From 423fdc50cee3ad86173a27e9726f71cbbfa8c99b Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Tue, 25 Jun 2024 10:37:07 +0200 Subject: [PATCH 01/25] minor formatting and mypy adjustments --- pyproject.toml | 4 ++++ src/backy/client.py | 12 +----------- src/backy/main.py | 20 +++++++++++++------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3a9af314..4af5ff32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,3 +77,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] backy = "backy.main:main" + +[[tool.mypy.overrides]] +module = "backy.*" +check_untyped_defs = true diff --git a/src/backy/client.py b/src/backy/client.py index c16e9a68..e35dacdb 100644 --- a/src/backy/client.py +++ b/src/backy/client.py @@ -230,17 +230,7 @@ async def jobs(self, filter_re=""): if filter_re: backups = list(filter(re.compile(filter_re).search, backups)) for b in backups: - t.add_row( - b, - "-", - "-", - "Dead", - "-", - "", - "-", - "-", - "", - ) + t.add_row(b, "-", "-", "Dead", "-", "", "-", "-", "") rprint(t) print("{} jobs shown".format(len(jobs) + len(backups))) diff --git a/src/backy/main.py b/src/backy/main.py index ffb16ab2..13db028f 100644 --- a/src/backy/main.py +++ b/src/backy/main.py @@ -94,7 +94,8 @@ def status(self, yaml_: bool, revision: str) -> None: pending_changes = sum(1 for r in revs if r.pending_changes) if pending_changes: rprint( - f"[yellow]{pending_changes} pending change(s)[/] (Push changes with `backy push`)" + f"[yellow]{pending_changes} pending change(s)[/] " + "(Push changes with `backy push`)" ) def backup(self, tags: str, force: bool) -> int: @@ -181,7 +182,9 @@ async def run() -> int: if peer not in d.peers: self.log.error( "client-peer-unknown", - _fmt_msg="The peer {peer} is not known. Select a known peer or specify --url and --token.\n" + _fmt_msg="The peer {peer} is not known. " + "Select a known peer or specify --url and " + "--token.\n" "The following peers are known: {known}", peer=peer, known=", ".join(d.peers.keys()), @@ -194,7 +197,8 @@ async def run() -> int: if "token" not in d.api_cli_default: self.log.error( "client-missing-defaults", - _fmt_msg="The config file is missing default parameters. Please specify --url and --token", + _fmt_msg="The config file is missing default " + "parameters. Please specify --url and --token", ) return 1 api = APIClient.from_conf( @@ -271,8 +275,8 @@ def setup_argparser(): type=Path, help=( "file name to write log output in. " - "(default: /var/log/backy.log for `scheduler`, ignored for `client`, " - "$backupdir/backy.log otherwise)" + "(default: /var/log/backy.log for `scheduler`, " + "ignored for `client`, $backupdir/backy.log otherwise)" ), ) parser.add_argument( @@ -435,7 +439,8 @@ def setup_argparser(): # upgrade p = subparsers.add_parser( "upgrade", - help="Upgrade this backup (incl. its data) to the newest supported version", + help="Upgrade this backup (incl. its data) to the newest " + "supported version", ) p.set_defaults(func="upgrade") @@ -522,7 +527,8 @@ def setup_argparser(): "--revision", metavar="SPEC", default="all", - help="modify tags for revision SPEC, modifies all if not given (default: %(default)s)", + help="modify tags for revision SPEC, modifies all if not given " + "(default: %(default)s)", ) p.add_argument( "tags", From 1b61103c3f5cc3c91ab93b91e2e89bb808917739 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Tue, 25 Jun 2024 10:47:52 +0200 Subject: [PATCH 02/25] fold timer into utilities, clean up ruff warnings --- src/backy/sources/flyingcircus/source.py | 2 +- src/backy/tests/test_timeout.py | 2 +- src/backy/timeout.py | 51 ----------------- src/backy/utils.py | 70 ++++++++++++++++++++---- 4 files changed, 62 insertions(+), 63 deletions(-) delete mode 100644 src/backy/timeout.py diff --git a/src/backy/sources/flyingcircus/source.py b/src/backy/sources/flyingcircus/source.py index 92929ee3..bce8400a 100644 --- a/src/backy/sources/flyingcircus/source.py +++ b/src/backy/sources/flyingcircus/source.py @@ -5,7 +5,7 @@ import consulate from structlog.stdlib import BoundLogger -from ...timeout import TimeOut, TimeOutError +from ...utils import TimeOut, TimeOutError from ..ceph.source import CephRBD diff --git a/src/backy/tests/test_timeout.py b/src/backy/tests/test_timeout.py index fdb18e44..ceaaea51 100644 --- a/src/backy/tests/test_timeout.py +++ b/src/backy/tests/test_timeout.py @@ -1,6 +1,6 @@ import pytest -from backy.timeout import TimeOut, TimeOutError +from backy.utils import TimeOut, TimeOutError def test_timeout(capsys): diff --git a/src/backy/timeout.py b/src/backy/timeout.py deleted file mode 100644 index 12dfe263..00000000 --- a/src/backy/timeout.py +++ /dev/null @@ -1,51 +0,0 @@ -# Vendored from fc.qemu. - -import time - - -class TimeOutError(RuntimeError): - pass - - -class TimeOut(object): - def __init__(self, timeout, interval=1, raise_on_timeout=False): - """Creates a timeout controller. - - TimeOut is typically used in a while loop to retry a command - for a while, e.g. for polling. Example:: - - timeout = TimeOut() - while timeout.tick(): - do_something - """ - - self.remaining = timeout - self.cutoff = time.time() + timeout - self.interval = interval - self.timed_out = False - self.first = True - self.raise_on_timeout = raise_on_timeout - - def tick(self): - """Perform a `tick` for this timeout. - - Returns True if we should keep going or False if not. - - Instead of returning False this can raise an exception - if raise_on_timeout is set. - """ - - self.remaining = self.cutoff - time.time() - self.timed_out = self.remaining <= 0 - - if self.timed_out: - if self.raise_on_timeout: - raise TimeOutError() - else: - return False - - if self.first: - self.first = False - else: - time.sleep(self.interval) - return True diff --git a/src/backy/utils.py b/src/backy/utils.py index 90357ac9..b01fa489 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -14,12 +14,12 @@ from asyncio import Event from os import DirEntry from typing import IO, Callable, Iterable, List, Literal, Optional, TypeVar -from zoneinfo import ZoneInfo import aiofiles.os as aos import humanize import structlog import tzlocal +from zoneinfo import ZoneInfo from .ext_deps import CP from .fallocate import punch_hole @@ -473,7 +473,9 @@ async def has_recent_changes(path: str, reference_time: float) -> bool: return False -async def delay_or_event(delay: float, event: Event) -> Optional[Literal[True]]: +async def delay_or_event( + delay: float, event: Event +) -> Optional[Literal[True]]: return await next( asyncio.as_completed([asyncio.sleep(delay), event.wait()]) ) @@ -508,29 +510,77 @@ def duplicates(a: List[_T], b: List[_T]) -> List[_T]: return unique(i for i in a if i in b) -def list_rindex(l: List[_T], v: _T) -> int: - return len(l) - l[-1::-1].index(v) - 1 +def list_rindex(L: List[_T], v: _T) -> int: + return len(L) - L[-1::-1].index(v) - 1 @typing.overload -def list_get(l: List[_T], i: int) -> _T | None: +def list_get(L: List[_T], i: int) -> _T | None: ... @typing.overload -def list_get(l: List[_T], i: int, default: _U) -> _T | _U: +def list_get(L: List[_T], i: int, default: _U) -> _T | _U: ... -def list_get(l, i, default=None): - return l[i] if -len(l) <= i < len(l) else default +def list_get(L, i, default=None): + return L[i] if -len(L) <= i < len(L) else default -def list_split(l: List[_T], v: _T) -> List[List[_T]]: +def list_split(L: List[_T], v: _T) -> List[List[_T]]: res: List[List[_T]] = [[]] - for i in l: + for i in L: if i == v: res.append([]) else: res[-1].append(i) return res + + +class TimeOutError(RuntimeError): + pass + + +class TimeOut(object): + def __init__(self, timeout, interval=1, raise_on_timeout=False): + """Creates a timeout controller. + + TimeOut is typically used in a while loop to retry a command + for a while, e.g. for polling. Example:: + + timeout = TimeOut() + while timeout.tick(): + do_something + """ + + self.remaining = timeout + self.cutoff = time.time() + timeout + self.interval = interval + self.timed_out = False + self.first = True + self.raise_on_timeout = raise_on_timeout + + def tick(self): + """Perform a `tick` for this timeout. + + Returns True if we should keep going or False if not. + + Instead of returning False this can raise an exception + if raise_on_timeout is set. + """ + + self.remaining = self.cutoff - time.time() + self.timed_out = self.remaining <= 0 + + if self.timed_out: + if self.raise_on_timeout: + raise TimeOutError() + else: + return False + + if self.first: + self.first = False + else: + time.sleep(self.interval) + return True From 333be729cab7d0d497f63a5fcd2585c85c1e4a91 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Tue, 25 Jun 2024 13:54:47 +0200 Subject: [PATCH 03/25] snapshot --- pyproject.toml | 5 +- setup.py | 10 +- src/backy/{main.py => cli/__init__.py} | 81 ++-------- src/backy/{ => cli}/client.py | 0 src/backy/{daemon.py => daemon/__init__.py} | 121 +++++++-------- src/backy/{ => daemon}/api.py | 0 src/backy/{ => daemon}/scheduler.py | 0 src/backy/fallocate.py | 66 -------- src/backy/rbd/__init__.py | 144 ++++++++++++++++++ src/backy/{ => rbd}/backends/__init__.py | 0 .../{ => rbd}/backends/chunked/__init__.py | 0 src/backy/{ => rbd}/backends/chunked/chunk.py | 0 src/backy/{ => rbd}/backends/chunked/file.py | 0 src/backy/{ => rbd}/backends/chunked/store.py | 0 .../backends/chunked/tests/test_backend.py | 0 .../backends/chunked/tests/test_chunk.py | 0 .../backends/chunked/tests/test_file.py | 0 src/backy/{ => rbd}/backends/cowfile.py | 0 src/backy/{ => rbd}/backup.py | 0 src/backy/{ => rbd}/quarantine.py | 0 src/backy/{ => rbd}/sources/__init__.py | 0 src/backy/{ => rbd}/sources/ceph/__init__.py | 0 src/backy/{ => rbd}/sources/ceph/diff.py | 0 src/backy/{ => rbd}/sources/ceph/rbd.py | 0 src/backy/{ => rbd}/sources/ceph/source.py | 0 .../{ => rbd}/sources/ceph/tests/conftest.py | 0 .../sources/ceph/tests/nodata.rbddiff | Bin .../sources/ceph/tests/test_ceph_source.py | 0 .../{ => rbd}/sources/ceph/tests/test_diff.py | 0 .../{ => rbd}/sources/ceph/tests/test_rbd.py | 0 src/backy/{ => rbd}/sources/file.py | 0 .../sources/flyingcircus/__init__.py | 0 .../{ => rbd}/sources/flyingcircus/source.py | 0 .../sources/flyingcircus/tests/test_source.py | 0 src/backy/s3/__init__.py | 1 + src/backy/utils.py | 67 +++++++- 36 files changed, 278 insertions(+), 217 deletions(-) rename src/backy/{main.py => cli/__init__.py} (88%) rename src/backy/{ => cli}/client.py (100%) rename src/backy/{daemon.py => daemon/__init__.py} (81%) rename src/backy/{ => daemon}/api.py (100%) rename src/backy/{ => daemon}/scheduler.py (100%) delete mode 100644 src/backy/fallocate.py create mode 100644 src/backy/rbd/__init__.py rename src/backy/{ => rbd}/backends/__init__.py (100%) rename src/backy/{ => rbd}/backends/chunked/__init__.py (100%) rename src/backy/{ => rbd}/backends/chunked/chunk.py (100%) rename src/backy/{ => rbd}/backends/chunked/file.py (100%) rename src/backy/{ => rbd}/backends/chunked/store.py (100%) rename src/backy/{ => rbd}/backends/chunked/tests/test_backend.py (100%) rename src/backy/{ => rbd}/backends/chunked/tests/test_chunk.py (100%) rename src/backy/{ => rbd}/backends/chunked/tests/test_file.py (100%) rename src/backy/{ => rbd}/backends/cowfile.py (100%) rename src/backy/{ => rbd}/backup.py (100%) rename src/backy/{ => rbd}/quarantine.py (100%) rename src/backy/{ => rbd}/sources/__init__.py (100%) rename src/backy/{ => rbd}/sources/ceph/__init__.py (100%) rename src/backy/{ => rbd}/sources/ceph/diff.py (100%) rename src/backy/{ => rbd}/sources/ceph/rbd.py (100%) rename src/backy/{ => rbd}/sources/ceph/source.py (100%) rename src/backy/{ => rbd}/sources/ceph/tests/conftest.py (100%) rename src/backy/{ => rbd}/sources/ceph/tests/nodata.rbddiff (100%) rename src/backy/{ => rbd}/sources/ceph/tests/test_ceph_source.py (100%) rename src/backy/{ => rbd}/sources/ceph/tests/test_diff.py (100%) rename src/backy/{ => rbd}/sources/ceph/tests/test_rbd.py (100%) rename src/backy/{ => rbd}/sources/file.py (100%) rename src/backy/{ => rbd}/sources/flyingcircus/__init__.py (100%) rename src/backy/{ => rbd}/sources/flyingcircus/source.py (100%) rename src/backy/{ => rbd}/sources/flyingcircus/tests/test_source.py (100%) create mode 100644 src/backy/s3/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 4af5ff32..3c38f17a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,10 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -backy = "backy.main:main" +backy = "backy.cli:main" +backyd = "backy.daemon:main" +backy-rbd = "backy.rbd:main" +backy-s3 = "backy.s3:main" [[tool.mypy.overrides]] module = "backy.*" diff --git a/setup.py b/setup.py index feb0069c..6a30f027 100644 --- a/setup.py +++ b/setup.py @@ -71,10 +71,6 @@ def long_desc(): "pytest-timeout", ], }, - entry_points=""" - [console_scripts] - backy = backy.main:main - """, author=( "Christian Theune , " "Christian Kauhaus , " @@ -97,11 +93,7 @@ def long_desc(): Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Topic :: System :: Archiving :: Backup -"""[ - :-1 - ].split( - "\n" - ), +"""[:-1].split("\n"), description=__doc__.strip(), long_description=long_desc(), packages=find_packages("src"), diff --git a/src/backy/main.py b/src/backy/cli/__init__.py similarity index 88% rename from src/backy/main.py rename to src/backy/cli/__init__.py index 13db028f..32943346 100644 --- a/src/backy/main.py +++ b/src/backy/cli/__init__.py @@ -17,10 +17,10 @@ from structlog.stdlib import BoundLogger import backy.daemon +from backy import logging +from backy.backup import Backup, RestoreBackend from backy.utils import format_datetime_local, generate_taskid -from . import logging -from .backup import Backup, RestoreBackend from .client import APIClient, CLIClient @@ -242,33 +242,25 @@ def tags( return int(not success) def expire(self) -> None: + # XXX needs to update from remote API peers first (pull) b = backy.backup.Backup(self.path, self.log) b.expire() b.warn_pending_changes() - def push(self, config: Path) -> int: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - b = backy.backup.Backup(self.path, self.log) - errors = asyncio.run(b.push_metadata(d.peers, self.taskid)) - return int(bool(errors)) - def pull(self, config: Path) -> int: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - b = backy.backup.Backup(self.path, self.log) - errors = asyncio.run(b.pull_metadata(d.peers, self.taskid)) - return int(bool(errors)) +def setup_argparser(): + return parser, client -def setup_argparser(): +def main(): parser = argparse.ArgumentParser( - description="Backup and restore for block devices.", + description="Backy command line client.", ) parser.add_argument( "-v", "--verbose", action="store_true", help="verbose output" ) + parser.add_argument( "-l", "--logfile", @@ -544,69 +536,14 @@ def setup_argparser(): ) p.set_defaults(func="expire") - # PUSH - p = subparsers.add_parser( - "push", - help="Push pending changes to remote servers", - ) - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - p.set_defaults(func="push") - - # PULL - p = subparsers.add_parser( - "pull", - help="Push pending changes to remote servers", - ) - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - p.set_defaults(func="pull") - - return parser, client - - -def main(): - parser, client_parser = setup_argparser() - args = parser.parse_args() - if not hasattr(args, "func"): parser.print_usage() sys.exit(0) - if args.func == "client" and not hasattr(args, "apifunc"): - client_parser.print_usage() - sys.exit(0) - - default_logfile: Optional[Path] - match args.func: - case "scheduler": - default_logfile = Path("/var/log/backy.log") - case "client": - default_logfile = None - case _: - default_logfile = args.backupdir / "backy.log" - - match (args.func, vars(args).get("apifunc")): - case ("scheduler", _): - default_job_name = "-" - case ("client", "check"): - default_job_name = "-" - case _: - default_job_name = "" # Logging logging.init_logging( args.verbose, - args.logfile or default_logfile, + args.logfile, defaults={"job_name": default_job_name, "taskid": args.taskid}, ) log = structlog.stdlib.get_logger(subsystem="command") diff --git a/src/backy/client.py b/src/backy/cli/client.py similarity index 100% rename from src/backy/client.py rename to src/backy/cli/client.py diff --git a/src/backy/daemon.py b/src/backy/daemon/__init__.py similarity index 81% rename from src/backy/daemon.py rename to src/backy/daemon/__init__.py index 03eb1764..9ce07c86 100644 --- a/src/backy/daemon.py +++ b/src/backy/daemon/__init__.py @@ -1,5 +1,9 @@ +# -*- encoding: utf-8 -*- + +import argparse import asyncio import datetime +import errno import fcntl import os import os.path as p @@ -7,13 +11,22 @@ import sys import time from pathlib import Path -from typing import IO, List, Optional, Pattern, TypedDict +from typing import IO, List, Literal, Optional, Pattern, TypedDict import aiofiles.os as aos import aioshutil +import humanize +import structlog +import tzlocal import yaml +from aiohttp import ClientConnectionError +from rich import print as rprint +from rich.table import Column, Table from structlog.stdlib import BoundLogger +from backy.utils import format_datetime_local, generate_taskid + +from . import logging from .api import BackyAPI from .backup import Backup from .revision import filter_manual_tags @@ -276,71 +289,12 @@ async def shutdown_loop(self): self.log.info("stopping-loop") self.loop.stop() - class StatusDict(TypedDict): - job: str - sla: str - sla_overdue: int - status: str - last_time: Optional[datetime.datetime] - last_tags: Optional[str] - last_duration: Optional[float] - next_time: Optional[datetime.datetime] - next_tags: Optional[str] - manual_tags: str - quarantine_reports: int - unsynced_revs: int - local_revs: int - - def status( - self, filter_re: Optional[Pattern[str]] = None - ) -> List[StatusDict]: - """Collects status information for all jobs.""" - result: List["BackyDaemon.StatusDict"] = [] - for job in list(self.jobs.values()): - if filter_re and not filter_re.search(job.name): - continue - job.backup.scan() - manual_tags = set() - unsynced_revs = 0 - history = job.backup.clean_history - for rev in history: - manual_tags |= filter_manual_tags(rev.tags) - if rev.pending_changes: - unsynced_revs += 1 - result.append( - dict( - job=job.name, - sla="OK" if job.sla else "TOO OLD", - sla_overdue=job.sla_overdue, - status=job.status, - last_time=history[-1].timestamp if history else None, - last_tags=( - ",".join(job.schedule.sorted_tags(history[-1].tags)) - if history - else None - ), - last_duration=( - history[-1].stats.get("duration", 0) - if history - else None - ), - next_time=job.next_time, - next_tags=( - ",".join(job.schedule.sorted_tags(job.next_tags)) - if job.next_tags - else None - ), - manual_tags=", ".join(manual_tags), - quarantine_reports=len(job.backup.quarantine.report_ids), - unsynced_revs=unsynced_revs, - local_revs=len( - job.backup.get_history(clean=True, local=True) - ), - ) - ) - return result - async def purge_old_files(self): + # This is a safety belt so we do not accidentlly NOT delete old backups + # of deleted VMs. + # XXX This should likely be implemented as a check to indicate that + # we missed a deletion marker and should delete something and not + # silently delete it. while True: try: self.log.info("purge-scanning") @@ -360,6 +314,8 @@ async def purge_old_files(self): await asyncio.sleep(24 * 60 * 60) async def purge_pending_backups(self): + # XXX This isn't to purge "pending backups" but this means + # "process pending purges" ... while True: try: self.log.info("purge-pending-scanning") @@ -380,11 +336,42 @@ async def purge_pending_backups(self): await asyncio.sleep(24 * 60 * 60) -def main(config_file: Path, log: BoundLogger): # pragma: no cover +def main(): + parser = argparse.ArgumentParser( + description="Backy daemon - runs the scheduler and API.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + parser.add_argument( + "-l", + "--logfile", + default=Path("/var/log/backy.log"), + type=Path, + help=( + "file name to write log output in. " + "(default: /var/log/backy.log for `scheduler`, " + "ignored for `client`, $backupdir/backy.log otherwise)" + ), + ) + parser.add_argument( + "-c", + "--config", + type=Path, + default="/etc/backy.conf", + help="(default: %(default)s)", + ) + args = parser.parse_args() + + # Logging + logging.init_logging(args.verbose, args.logfile) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(sys.argv)) + global daemon loop = asyncio.get_event_loop() - daemon = BackyDaemon(config_file, log) + daemon = BackyDaemon(args.config, log) daemon.start(loop) daemon.api_server() daemon.run_forever() diff --git a/src/backy/api.py b/src/backy/daemon/api.py similarity index 100% rename from src/backy/api.py rename to src/backy/daemon/api.py diff --git a/src/backy/scheduler.py b/src/backy/daemon/scheduler.py similarity index 100% rename from src/backy/scheduler.py rename to src/backy/daemon/scheduler.py diff --git a/src/backy/fallocate.py b/src/backy/fallocate.py deleted file mode 100644 index 691c1ab5..00000000 --- a/src/backy/fallocate.py +++ /dev/null @@ -1,66 +0,0 @@ -# Adapted from -# https://github.com/trbs/fallocate/issues/4 - -import ctypes -import ctypes.util -import os - -import structlog - -log = structlog.stdlib.get_logger() - -FALLOC_FL_KEEP_SIZE = 0x01 -FALLOC_FL_PUNCH_HOLE = 0x02 - - -def _fake_fallocate(fd, mode, offset, len_): - log.debug("fallocate-non-hole-punching") - if len_ <= 0: - raise IOError("fallocate: length must be positive") - if mode & FALLOC_FL_PUNCH_HOLE: - old = fd.tell() - fd.seek(offset) - fd.write(b"\x00" * len_) - fd.seek(old) - else: - raise NotImplementedError( - "fake fallocate() supports only hole punching" - ) - - -def _make_fallocate(): - libc_name = ctypes.util.find_library("c") - libc = ctypes.CDLL(libc_name, use_errno=True) - _fallocate = libc.fallocate - c_off_t = ctypes.c_size_t - _fallocate.restype = ctypes.c_int - _fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t] - - def fallocate(fd, mode, offset, len_): - if len_ <= 0: - raise IOError("fallocate: length must be positive") - res = _fallocate(fd.fileno(), mode, offset, len_) - if res != 0: - errno = ctypes.get_errno() - raise OSError(errno, "fallocate: " + os.strerror(errno)) - - return fallocate - - -try: - fallocate = _make_fallocate() -except AttributeError: # pragma: no cover - fallocate = _fake_fallocate - - -def punch_hole(f, offset, len_): - """Ensure that the specified byte range is zeroed. - - Depending on the availability of fallocate(), this is either - delegated to the kernel or done manualy. - """ - params = (f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, offset, len_) - try: - fallocate(*params) - except OSError: - _fake_fallocate(*params) diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py new file mode 100644 index 00000000..cfb77673 --- /dev/null +++ b/src/backy/rbd/__init__.py @@ -0,0 +1,144 @@ +# -*- encoding: utf-8 -*- + +import argparse +import asyncio +import errno +import sys +from pathlib import Path +from typing import Literal, Optional + +import humanize +import structlog +import tzlocal +import yaml +from aiohttp import ClientConnectionError +from rich import print as rprint +from rich.table import Column, Table +from structlog.stdlib import BoundLogger + +import backy.daemon +from backy.utils import format_datetime_local, generate_taskid + +from . import logging +from .backup import Backup, RestoreBackend + + +def main(): + parser = argparse.ArgumentParser( + description="Backup and restore for block devices.", + ) + + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + parser.add_argument( + "-t", + "--taskid", + default=generate_taskid(), + help="id to include in log messages (default: 4 random base32 chars)", + ) + + subparsers = parser.add_subparsers() + + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.add_argument("job", help="Which job to perform a backup for.") + p.add_argument("revision", help="Revision to work on.") + p.set_defaults(func="backup") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target", + ) + p.add_argument( + "--backend", + type=RestoreBackend, + choices=list(RestoreBackend), + default=RestoreBackend.AUTO, + dest="restore_backend", + help="(default: %(default)s)", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="latest", + help="use revision SPEC as restore source (default: %(default)s)", + ) + p.add_argument( + "target", + metavar="TARGET", + help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', + ) + p.set_defaults(func="restore") + + # XXX rename to "garbage collect" + p = subparsers.add_parser( + "purge", + help="Purge the backup store (i.e. chunked) from unused data", + ) + p.set_defaults(func="purge") + + p = subparsers.add_parser( + "verify", + help="Verify specified revisions", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="trust:distrusted&local", + help="use revision SPEC to verify (default: %(default)s)", + ) + p.set_defaults(func="verify") + + args = parser.parse_args() + + if not hasattr(args, "func"): + parser.print_usage() + sys.exit(0) + + logfile = args.backupdir / "backy.log" + + # Logging + logging.init_logging( + args.verbose, + args.logfile or default_logfile, + defaults={"taskid": args.taskid}, + ) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(sys.argv)) + + # Pass over to function + func_args = dict(args._get_kwargs()) + del func_args["func"] + del func_args["verbose"] + del func_args["backupdir"] + del func_args["logfile"] + del func_args["taskid"] + + try: + log.debug("parsed", func=args.func, func_args=func_args) + b = Backup(self.path, self.log) + # XXX scheduler? + b._clean() + try: + success = b.backup() + ret = int(not success) + except IOError as e: + if e.errno not in [errno.EDEADLK, errno.EAGAIN]: + raise + self.log.warning("backup-currently-locked") + ret = 1 + if isinstance(ret, int): + log.debug("return-code", code=ret) + sys.exit(ret) + log.debug("successful") + sys.exit(0) + except Exception: + log.exception("failed") + sys.exit(1) diff --git a/src/backy/backends/__init__.py b/src/backy/rbd/backends/__init__.py similarity index 100% rename from src/backy/backends/__init__.py rename to src/backy/rbd/backends/__init__.py diff --git a/src/backy/backends/chunked/__init__.py b/src/backy/rbd/backends/chunked/__init__.py similarity index 100% rename from src/backy/backends/chunked/__init__.py rename to src/backy/rbd/backends/chunked/__init__.py diff --git a/src/backy/backends/chunked/chunk.py b/src/backy/rbd/backends/chunked/chunk.py similarity index 100% rename from src/backy/backends/chunked/chunk.py rename to src/backy/rbd/backends/chunked/chunk.py diff --git a/src/backy/backends/chunked/file.py b/src/backy/rbd/backends/chunked/file.py similarity index 100% rename from src/backy/backends/chunked/file.py rename to src/backy/rbd/backends/chunked/file.py diff --git a/src/backy/backends/chunked/store.py b/src/backy/rbd/backends/chunked/store.py similarity index 100% rename from src/backy/backends/chunked/store.py rename to src/backy/rbd/backends/chunked/store.py diff --git a/src/backy/backends/chunked/tests/test_backend.py b/src/backy/rbd/backends/chunked/tests/test_backend.py similarity index 100% rename from src/backy/backends/chunked/tests/test_backend.py rename to src/backy/rbd/backends/chunked/tests/test_backend.py diff --git a/src/backy/backends/chunked/tests/test_chunk.py b/src/backy/rbd/backends/chunked/tests/test_chunk.py similarity index 100% rename from src/backy/backends/chunked/tests/test_chunk.py rename to src/backy/rbd/backends/chunked/tests/test_chunk.py diff --git a/src/backy/backends/chunked/tests/test_file.py b/src/backy/rbd/backends/chunked/tests/test_file.py similarity index 100% rename from src/backy/backends/chunked/tests/test_file.py rename to src/backy/rbd/backends/chunked/tests/test_file.py diff --git a/src/backy/backends/cowfile.py b/src/backy/rbd/backends/cowfile.py similarity index 100% rename from src/backy/backends/cowfile.py rename to src/backy/rbd/backends/cowfile.py diff --git a/src/backy/backup.py b/src/backy/rbd/backup.py similarity index 100% rename from src/backy/backup.py rename to src/backy/rbd/backup.py diff --git a/src/backy/quarantine.py b/src/backy/rbd/quarantine.py similarity index 100% rename from src/backy/quarantine.py rename to src/backy/rbd/quarantine.py diff --git a/src/backy/sources/__init__.py b/src/backy/rbd/sources/__init__.py similarity index 100% rename from src/backy/sources/__init__.py rename to src/backy/rbd/sources/__init__.py diff --git a/src/backy/sources/ceph/__init__.py b/src/backy/rbd/sources/ceph/__init__.py similarity index 100% rename from src/backy/sources/ceph/__init__.py rename to src/backy/rbd/sources/ceph/__init__.py diff --git a/src/backy/sources/ceph/diff.py b/src/backy/rbd/sources/ceph/diff.py similarity index 100% rename from src/backy/sources/ceph/diff.py rename to src/backy/rbd/sources/ceph/diff.py diff --git a/src/backy/sources/ceph/rbd.py b/src/backy/rbd/sources/ceph/rbd.py similarity index 100% rename from src/backy/sources/ceph/rbd.py rename to src/backy/rbd/sources/ceph/rbd.py diff --git a/src/backy/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py similarity index 100% rename from src/backy/sources/ceph/source.py rename to src/backy/rbd/sources/ceph/source.py diff --git a/src/backy/sources/ceph/tests/conftest.py b/src/backy/rbd/sources/ceph/tests/conftest.py similarity index 100% rename from src/backy/sources/ceph/tests/conftest.py rename to src/backy/rbd/sources/ceph/tests/conftest.py diff --git a/src/backy/sources/ceph/tests/nodata.rbddiff b/src/backy/rbd/sources/ceph/tests/nodata.rbddiff similarity index 100% rename from src/backy/sources/ceph/tests/nodata.rbddiff rename to src/backy/rbd/sources/ceph/tests/nodata.rbddiff diff --git a/src/backy/sources/ceph/tests/test_ceph_source.py b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py similarity index 100% rename from src/backy/sources/ceph/tests/test_ceph_source.py rename to src/backy/rbd/sources/ceph/tests/test_ceph_source.py diff --git a/src/backy/sources/ceph/tests/test_diff.py b/src/backy/rbd/sources/ceph/tests/test_diff.py similarity index 100% rename from src/backy/sources/ceph/tests/test_diff.py rename to src/backy/rbd/sources/ceph/tests/test_diff.py diff --git a/src/backy/sources/ceph/tests/test_rbd.py b/src/backy/rbd/sources/ceph/tests/test_rbd.py similarity index 100% rename from src/backy/sources/ceph/tests/test_rbd.py rename to src/backy/rbd/sources/ceph/tests/test_rbd.py diff --git a/src/backy/sources/file.py b/src/backy/rbd/sources/file.py similarity index 100% rename from src/backy/sources/file.py rename to src/backy/rbd/sources/file.py diff --git a/src/backy/sources/flyingcircus/__init__.py b/src/backy/rbd/sources/flyingcircus/__init__.py similarity index 100% rename from src/backy/sources/flyingcircus/__init__.py rename to src/backy/rbd/sources/flyingcircus/__init__.py diff --git a/src/backy/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py similarity index 100% rename from src/backy/sources/flyingcircus/source.py rename to src/backy/rbd/sources/flyingcircus/source.py diff --git a/src/backy/sources/flyingcircus/tests/test_source.py b/src/backy/rbd/sources/flyingcircus/tests/test_source.py similarity index 100% rename from src/backy/sources/flyingcircus/tests/test_source.py rename to src/backy/rbd/sources/flyingcircus/tests/test_source.py diff --git a/src/backy/s3/__init__.py b/src/backy/s3/__init__.py new file mode 100644 index 00000000..4cbb3021 --- /dev/null +++ b/src/backy/s3/__init__.py @@ -0,0 +1 @@ +# Placeholder for future S3 implementation diff --git a/src/backy/utils.py b/src/backy/utils.py index b01fa489..f032c8ac 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -1,6 +1,8 @@ import asyncio import base64 import contextlib +import ctypes +import ctypes.util import datetime import hashlib import mmap @@ -12,7 +14,6 @@ import time import typing from asyncio import Event -from os import DirEntry from typing import IO, Callable, Iterable, List, Literal, Optional, TypeVar import aiofiles.os as aos @@ -22,7 +23,6 @@ from zoneinfo import ZoneInfo from .ext_deps import CP -from .fallocate import punch_hole _T = TypeVar("_T") _U = TypeVar("_U") @@ -584,3 +584,66 @@ def tick(self): else: time.sleep(self.interval) return True + + +# Adapted from +# https://github.com/trbs/fallocate/issues/4 + + +log = structlog.stdlib.get_logger() + +FALLOC_FL_KEEP_SIZE = 0x01 +FALLOC_FL_PUNCH_HOLE = 0x02 + + +def _fake_fallocate(fd, mode, offset, len_): + log.debug("fallocate-non-hole-punching") + if len_ <= 0: + raise IOError("fallocate: length must be positive") + if mode & FALLOC_FL_PUNCH_HOLE: + old = fd.tell() + fd.seek(offset) + fd.write(b"\x00" * len_) + fd.seek(old) + else: + raise NotImplementedError( + "fake fallocate() supports only hole punching" + ) + + +def _make_fallocate(): + libc_name = ctypes.util.find_library("c") + libc = ctypes.CDLL(libc_name, use_errno=True) + _fallocate = libc.fallocate + c_off_t = ctypes.c_size_t + _fallocate.restype = ctypes.c_int + _fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t] + + def fallocate(fd, mode, offset, len_): + if len_ <= 0: + raise IOError("fallocate: length must be positive") + res = _fallocate(fd.fileno(), mode, offset, len_) + if res != 0: + errno = ctypes.get_errno() + raise OSError(errno, "fallocate: " + os.strerror(errno)) + + return fallocate + + +try: + fallocate = _make_fallocate() +except AttributeError: # pragma: no cover + fallocate = _fake_fallocate + + +def punch_hole(f, offset, len_): + """Ensure that the specified byte range is zeroed. + + Depending on the availability of fallocate(), this is either + delegated to the kernel or done manualy. + """ + params = (f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, offset, len_) + try: + fallocate(*params) + except OSError: + _fake_fallocate(*params) From 9dc535983798c61d1a9195c29e3b7bcb6555526c Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Tue, 25 Jun 2024 15:26:51 +0200 Subject: [PATCH 04/25] snapshot --- src/backy/backup.py | 410 ++++++++++++++++++ src/backy/daemon/scheduler.py | 134 ++++++ src/backy/rbd/__init__.py | 101 ++--- src/backy/rbd/backends/__init__.py | 4 - src/backy/rbd/backends/cowfile.py | 24 -- src/backy/rbd/backup.py | 648 +---------------------------- src/backy/rbd/revision.py | 187 +++++++++ src/backy/revision.py | 9 - 8 files changed, 777 insertions(+), 740 deletions(-) create mode 100644 src/backy/backup.py delete mode 100644 src/backy/rbd/backends/cowfile.py create mode 100644 src/backy/rbd/revision.py diff --git a/src/backy/backup.py b/src/backy/backup.py new file mode 100644 index 00000000..0bea1e4b --- /dev/null +++ b/src/backy/backup.py @@ -0,0 +1,410 @@ +import datetime +import re +from math import ceil, floor +from pathlib import Path +from typing import IO, List, Literal, Optional, Type + +import tzlocal +import yaml +from structlog.stdlib import BoundLogger + +from backy.utils import ( + duplicates, + list_get, + list_rindex, + list_split, + min_date, + unique, +) + +from .revision import Revision, Trust, filter_schedule_tags +from .schedule import Schedule + + +class Backup(object): + """A backup of a VM.""" + + path: Path + config: dict + schedule: Schedule + history: list[Revision] + log: BoundLogger + + _by_uuid: dict[str, Revision] + + def __init__(self, path: Path, log: BoundLogger): + self.log = log.bind(subsystem="backup") + self.path = path.resolve() + + # Load config from file + try: + with self.path.joinpath("config").open(encoding="utf-8") as f: + self.config = yaml.safe_load(f) + except IOError: + self.log.error( + "could-not-read-config", + _fmt_msg="Could not read config file. Is --backupdir correct?", + config_path=str(self.path / "config"), + ) + raise + + self.schedule = Schedule() + self.schedule.configure(self.config["schedule"]) + + @property + def name(self) -> str: + return self.path.name + + def to_dict(self): + return self.config + + def scan(self) -> None: + self.history = [] + self._by_uuid = {} + for f in self.path.glob("*.rev"): + if f.is_symlink(): + # Ignore links that are used to create readable pointers + continue + r = Revision.load(f, self, self.log) + if r.uuid not in self._by_uuid: + self._by_uuid[r.uuid] = r + self.history.append(r) + # The history is stored: oldest first. newest last. + self.history.sort(key=lambda r: r.timestamp) + + def touch(self): + self.path.touch() + + def set_purge_pending(self): + self.path.joinpath(".purge_pending").touch() + + def clear_purge_pending(self): + self.path.joinpath(".purge_pending").unlink(missing_ok=True) + + def get_history( + self, *, clean: bool = False, local: bool = False + ) -> list[Revision]: + return [ + rev + for rev in self.history + if (not clean or "duration" in rev.stats) + and (not local or not rev.server) + ] + + @property + def clean_history(self) -> List[Revision]: + """History without incomplete revisions.""" + return self.get_history(clean=True) + + @property + def local_history(self): + """History without incomplete revisions.""" + return self.get_history(local=True) + + @property + def contains_distrusted(self) -> bool: + return any( + ( + r == Trust.DISTRUSTED + for r in self.get_history(clean=True, local=True) + ) + ) + + def validate_tags(self, tags): + missing_tags = ( + filter_schedule_tags(tags) - self.schedule.schedule.keys() + ) + if missing_tags: + self.log.error( + "unknown-tags", + _fmt_msg="The following tags are missing from the schedule: {unknown_tags}\n" + "Check the config file, add the `manual:` prefix or disable tag validation (-f)", + unknown_tags=", ".join(missing_tags), + ) + raise RuntimeError("Unknown tags") + + def warn_pending_changes(self, revs: Optional[List[Revision]] = None): + revs = revs if revs is not None else self.history + pending = [r for r in revs if r.pending_changes] + if pending: + self.log.warning( + "pending-changes", + _fmt_msg="Synchronize with remote server (backy push) or risk loosing changes", + revisions=",".join(r.uuid for r in pending), + ) + + def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): + revs = revs if revs is not None else self.history + remote = [r for r in revs if r.server] + if remote: + self.log.error( + "remote-revs-disallowed", + _fmt_msg="Can not modify trust state of remote revisions locally.\n" + "Either include a filter to exclude them (local)\n" + "or edit them on the origin server and pull the changes (backy pull)", + revisions=",".join(r.uuid for r in remote), + ) + raise RuntimeError("Remote revs disallowed") + + ################# + # Making backups + + @locked(target=".backup", mode="exclusive") + def _clean(self) -> None: + """Clean-up incomplete revisions.""" + for revision in self.local_history: + if "duration" not in revision.stats: + self.log.warning( + "clean-incomplete", revision_uuid=revision.uuid + ) + revision.remove() + + @locked(target=".backup", mode="exclusive") + def forget(self, revision: str) -> None: + for r in self.find_revisions(revision): + r.remove() + + @locked(target=".backup", mode="exclusive") + def expire(self): + self.schedule.expire(self) + + @locked(target=".backup", mode="exclusive") + def tags( + self, + action: Literal["set", "add", "remove"], + revision: str, + tags: set[str], + expect: Optional[set[str]] = None, + autoremove: bool = False, + force=False, + ) -> bool: + self.scan() + revs = self.find_revisions(revision) + if not force and action != "remove": + self.validate_tags(tags) + for r in revs: + if expect is not None and expect != r.tags: + self.log.error("tags-expectation-failed") + return False + for r in revs: + match action: + case "set": + r.tags = tags + case "add": + r.tags |= tags + case "remove": + r.tags -= tags + case _: + raise ValueError(f"invalid action '{action}'") + if not r.tags and autoremove: + r.remove() + else: + r.write_info() + return True + + @locked(target=".backup", mode="exclusive") + def distrust(self, revision: str) -> None: + revs = self.find_revisions(revision) + self.prevent_remote_rev(revs) + for r in revs: + r.distrust() + r.write_info() + + ###################### + # Looking up revisions + + def last_by_tag(self) -> dict[str, datetime.datetime]: + """Return a dictionary showing the last time each tag was + backed up. + + Tags that have never been backed up won't show up here. + + """ + last_times: dict[str, datetime.datetime] = {} + for revision in self.clean_history: + for tag in revision.tags: + last_times.setdefault(tag, min_date()) + last_times[tag] = max([last_times[tag], revision.timestamp]) + return last_times + + def find_revisions( + self, spec: str | List[str | Revision | List[Revision]] + ) -> List[Revision]: + """Get a sorted list of revisions, oldest first, that match the given + specification. + """ + + tokens: List[str | Revision | List[Revision]] + if isinstance(spec, str): + tokens = [ + t.strip() + for t in re.split(r"(\(|\)|,|&|\.\.)", spec) + if t.strip() + ] + else: + tokens = spec + if "(" in tokens and ")" in tokens: + i = list_rindex(tokens, "(") + j = tokens.index(")", i) + prev, middle, next = tokens[:i], tokens[i + 1 : j], tokens[j + 1 :] + + functions = { + "first": lambda x: x[0], + "last": lambda x: x[-1], + "not": lambda x: [r for r in self.history if r not in x], + "reverse": lambda x: list(reversed(x)), + } + if prev and isinstance(prev[-1], str) and prev[-1] in functions: + return self.find_revisions( + prev[:-1] + + [functions[prev[-1]](self.find_revisions(middle))] + + next + ) + return self.find_revisions( + prev + [self.find_revisions(middle)] + next + ) + elif "," in tokens: + i = tokens.index(",") + return unique( + self.find_revisions(tokens[:i]) + + self.find_revisions(tokens[i + 1 :]) + ) + elif "&" in tokens: + i = tokens.index("&") + return duplicates( + self.find_revisions(tokens[:i]), + self.find_revisions(tokens[i + 1 :]), + ) + elif ".." in tokens: + _a, _b = list_split(tokens, "..") + assert len(_a) <= 1 and len(_b) <= 1 + a = self.index_by_token(list_get(_a, 0, "first")) + b = self.index_by_token(list_get(_b, 0, "last")) + return self.history[ceil(min(a, b)) : floor(max(a, b)) + 1] + assert len(tokens) == 1 + token = tokens[0] + if isinstance(token, Revision): + return [token] + elif isinstance(token, list): + return token + if token.startswith("server:"): + server = token.removeprefix("server:") + return [r for r in self.history if server == r.server] + elif token.startswith("tag:"): + tag = token.removeprefix("tag:") + return [r for r in self.history if tag in r.tags] + elif token.startswith("trust:"): + trust = Trust(token.removeprefix("trust:").lower()) + return [r for r in self.history if trust == r.trust] + elif token == "all": + return self.history[:] + elif token == "clean": + return self.clean_history + elif token == "local": + return self.find_revisions("server:") + elif token == "remote": + return self.find_revisions("not(server:)") + else: + return [self.find(token)] + + def index_by_token(self, spec: str | Revision | List[Revision]) -> float: + assert not isinstance( + spec, list + ), "can only index a single revision specifier" + if isinstance(spec, str): + return self.index_by_date(spec) or self.history.index( + self.find(spec) + ) + else: + return self.history.index(spec) + + def index_by_date(self, spec: str) -> Optional[float]: + """Return index of revision matched by datetime. + Index may be fractional if there is no exact datetime match. + Index range: [-0.5, len+0.5] + """ + try: + date = datetime.datetime.fromisoformat(spec) + date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone()) + l = list_get( + [i for i, r in enumerate(self.history) if r.timestamp <= date], + -1, + -1, + ) + r = list_get( + [i for i, r in enumerate(self.history) if r.timestamp >= date], + 0, + len(self.history), + ) + print(spec, l, r) + assert ( + 0 <= r - l <= 1 + ), "can not index with date if multiple revision have the same timestamp" + return (l + r) / 2.0 + except ValueError: + return None + + def find_by_number(self, _spec: str) -> Revision: + """Returns revision by relative number. + + 0 is the newest, + 1 is the next older, + 2 is the even next older, + and so on ... + + Raises IndexError or ValueError if no revision is found. + """ + spec = int(_spec) + if spec < 0: + raise KeyError("Integer revisions must be positive") + return self.history[-spec - 1] + + def find_by_tag(self, spec: str) -> Revision: + """Returns the latest revision matching a given tag. + + Raises IndexError or ValueError if no revision is found. + """ + if spec in ["last", "latest"]: + return self.history[-1] + if spec == "first": + return self.history[0] + raise ValueError() + + def find_by_uuid(self, spec: str) -> Revision: + """Returns revision matched by UUID. + + Raises IndexError if no revision is found. + """ + try: + return self._by_uuid[spec] + except KeyError: + raise IndexError() + + def find_by_function(self, spec: str) -> Revision: + m = re.fullmatch(r"(\w+)\(.+\)", spec) + if m and m.group(1) in ["first", "last"]: + return self.find_revisions(m.group(0))[0] + raise ValueError() + + def find(self, spec: str) -> Revision: + """Flexible revision search. + + Locates a revision by relative number, by tag, or by uuid. + + """ + spec = spec.strip() + if spec == "" or not self.history: + raise KeyError(spec) + + for find in ( + self.find_by_number, + self.find_by_uuid, + self.find_by_tag, + self.find_by_function, + ): + try: + return find(spec) + except (ValueError, IndexError): + pass + self.log.warning("find-rev-not-found", spec=spec) + raise KeyError(spec) diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index 7c879074..bc7a83c1 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -549,3 +549,137 @@ def stop(self) -> None: self._task.cancel() self._task = None self.update_status("") + + @locked(target=".backup", mode="exclusive") + async def push_metadata(self, peers, taskid: str) -> int: + grouped = defaultdict(list) + for r in self.clean_history: + if r.pending_changes: + grouped[r.server].append(r) + self.log.info( + "push-start", changes=sum(len(l) for l in grouped.values()) + ) + async with APIClientManager(peers, taskid, self.log) as apis: + errors = await asyncio.gather( + *[ + self._push_metadata(apis[server], grouped[server]) + for server in apis + ] + ) + self.log.info("push-end", errors=sum(errors)) + return sum(errors) + + async def _push_metadata( + self, api: APIClient, revs: List[Revision] + ) -> bool: + purge_required = False + error = False + for r in revs: + log = self.log.bind( + server=r.server, + rev_uuid=r.uuid, + ) + log.debug( + "push-updating-tags", + old_tags=r.orig_tags, + new_tags=r.tags, + ) + try: + await api.put_tags(r, autoremove=True) + if r.tags: + r.orig_tags = r.tags + r.write_info() + else: + r.remove(force=True) + purge_required = True + except ClientResponseError: + log.warning("push-client-error", exc_style="short") + error = True + except ClientConnectionError: + log.warning("push-connection-error", exc_style="short") + error = True + except ClientError: + log.exception("push-error") + error = True + + if purge_required: + log = self.log.bind(server=api.server_name) + log.debug("push-purging-remote") + try: + await api.run_purge(self.name) + except ClientResponseError: + log.warning("push-purge-client-error", exc_style="short") + error = True + except ClientConnectionError: + log.warning("push-purge-connection-error", exc_style="short") + error = True + except ClientError: + log.error("push-purge-error") + error = True + return error + + @locked(target=".backup", mode="exclusive") + async def pull_metadata(self, peers: dict, taskid: str) -> int: + async def remove_dead_peer(): + for r in list(self.history): + if r.server and r.server not in peers: + self.log.info( + "pull-removing-dead-peer", + rev_uuid=r.uuid, + server=r.server, + ) + r.remove(force=True) + return False + + self.log.info("pull-start") + async with APIClientManager(peers, taskid, self.log) as apis: + errors = await asyncio.gather( + remove_dead_peer(), + *[self._pull_metadata(apis[server]) for server in apis], + ) + self.log.info("pull-end", errors=sum(errors)) + return sum(errors) + + async def _pull_metadata(self, api: APIClient) -> bool: + error = False + log = self.log.bind(server=api.server_name) + try: + await api.touch_backup(self.name) + remote_revs = await api.get_revs(self) + log.debug("pull-found-revs", revs=len(remote_revs)) + except ClientResponseError as e: + if e.status in [ + HTTPNotFound.status_code, + HTTPForbidden.status_code, + ]: + log.debug("pull-not-found") + else: + log.warning("pull-client-error", exc_style="short") + error = True + remote_revs = [] + except ClientConnectionError: + log.warning("pull-connection-error", exc_style="short") + return True + except ClientError: + log.exception("pull-error") + error = True + remote_revs = [] + + local_uuids = { + r.uuid for r in self.history if r.server == api.server_name + } + remote_uuids = {r.uuid for r in remote_revs} + for uuid in local_uuids - remote_uuids: + log.warning("pull-removing-unknown-rev", rev_uuid=uuid) + self.find_by_uuid(uuid).remove(force=True) + + for r in remote_revs: + if r.uuid in local_uuids: + if r.to_dict() == self.find_by_uuid(r.uuid).to_dict(): + continue + log.debug("pull-updating-rev", rev_uid=r.uuid) + else: + log.debug("pull-new-rev", rev_uid=r.uuid) + r.write_info() + + return error diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index cfb77673..7aa2df5c 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -1,26 +1,14 @@ -# -*- encoding: utf-8 -*- - import argparse -import asyncio import errno import sys from pathlib import Path -from typing import Literal, Optional -import humanize import structlog -import tzlocal -import yaml -from aiohttp import ClientConnectionError -from rich import print as rprint -from rich.table import Column, Table -from structlog.stdlib import BoundLogger -import backy.daemon -from backy.utils import format_datetime_local, generate_taskid +from backy.utils import generate_taskid -from . import logging -from .backup import Backup, RestoreBackend +from .. import logging +from .backup import RbdBackup, RestoreBackend def main(): @@ -35,8 +23,9 @@ def main(): "-t", "--taskid", default=generate_taskid(), - help="id to include in log messages (default: 4 random base32 chars)", + help="ID to include in log messages (default: 4 random base32 chars)", ) + parser.add_argument("-j", "--job", help="Job to work on.") subparsers = parser.add_subparsers() @@ -45,9 +34,8 @@ def main(): "backup", help="Perform a backup", ) - p.add_argument("job", help="Which job to perform a backup for.") - p.add_argument("revision", help="Revision to work on.") p.set_defaults(func="backup") + parser.add_argument("-r", "--revision", help="Revision to work on.") # RESTORE p = subparsers.add_parser( @@ -62,13 +50,7 @@ def main(): dest="restore_backend", help="(default: %(default)s)", ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="latest", - help="use revision SPEC as restore source (default: %(default)s)", - ) + parser.add_argument("-r", "--revision", help="Revision to work on.") p.add_argument( "target", metavar="TARGET", @@ -76,24 +58,19 @@ def main(): ) p.set_defaults(func="restore") - # XXX rename to "garbage collect" + # GC p = subparsers.add_parser( - "purge", - help="Purge the backup store (i.e. chunked) from unused data", + "gc", + help="Purge the backup store from unused data", ) - p.set_defaults(func="purge") + p.set_defaults(func="gc") + # VERIFY p = subparsers.add_parser( "verify", - help="Verify specified revisions", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="trust:distrusted&local", - help="use revision SPEC to verify (default: %(default)s)", + help="Verify specified revision", ) + parser.add_argument("-r", "--revision", help="Revision to work on.") p.set_defaults(func="verify") args = parser.parse_args() @@ -102,43 +79,39 @@ def main(): parser.print_usage() sys.exit(0) - logfile = args.backupdir / "backy.log" + backupdir = Path() # TODO # Logging logging.init_logging( args.verbose, - args.logfile or default_logfile, + backupdir / "backy.log", defaults={"taskid": args.taskid}, ) log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) - # Pass over to function - func_args = dict(args._get_kwargs()) - del func_args["func"] - del func_args["verbose"] - del func_args["backupdir"] - del func_args["logfile"] - del func_args["taskid"] - try: - log.debug("parsed", func=args.func, func_args=func_args) - b = Backup(self.path, self.log) + b = RbdBackup(backupdir, log) # XXX scheduler? b._clean() - try: - success = b.backup() - ret = int(not success) - except IOError as e: - if e.errno not in [errno.EDEADLK, errno.EAGAIN]: - raise - self.log.warning("backup-currently-locked") - ret = 1 - if isinstance(ret, int): - log.debug("return-code", code=ret) - sys.exit(ret) - log.debug("successful") - sys.exit(0) - except Exception: - log.exception("failed") + ret = 0 + match args.fun: + case "backup": + success = b.backup(args.revision) + ret = int(not success) + case "restore": + b.restore(args.revisions, args.target, args.backend) + case "gc": + b.gc() + case "verify": + b.verify(args.revision) + case _: + raise ValueError("invalid function: " + args.fun) + log.debug("return-code", code=ret) + sys.exit(ret) + except Exception as e: + if isinstance(e, IOError) and e.errno in [errno.EDEADLK, errno.EAGAIN]: + log.warning("backup-currently-locked") + else: + log.exception("failed") sys.exit(1) diff --git a/src/backy/rbd/backends/__init__.py b/src/backy/rbd/backends/__init__.py index a19d7584..b1b7c80c 100644 --- a/src/backy/rbd/backends/__init__.py +++ b/src/backy/rbd/backends/__init__.py @@ -33,9 +33,5 @@ def select_backend(type_: str) -> Type[BackyBackend]: from backy.backends.chunked import ChunkedFileBackend return ChunkedFileBackend - case "cowfile": - from backy.backends.cowfile import COWFileBackend - - return COWFileBackend case _: raise ValueError(f"Invalid backend '{type_}'") diff --git a/src/backy/rbd/backends/cowfile.py b/src/backy/rbd/backends/cowfile.py deleted file mode 100644 index a6ee3307..00000000 --- a/src/backy/rbd/backends/cowfile.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import IO, Optional - -from structlog.stdlib import BoundLogger - -from backy.backends import BackyBackend -from backy.revision import Revision -from backy.utils import CHUNK_SIZE, cp_reflink - - -class COWFileBackend(BackyBackend): - revision: Revision - - def __init__(self, revision: Revision, log: BoundLogger): - assert revision.backend_type == "cowfile" - self.revision = revision - - def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> IO: - if not self.revision.filename.exists(): - if not parent: - self.revision.filename.open("wb").close() - else: - cp_reflink(parent.filename, self.revision.filename) - self.revision.writable() - return self.revision.filename.open(mode, buffering=CHUNK_SIZE) diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index a50d98a8..6a002174 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -1,34 +1,18 @@ -import asyncio -import datetime import fcntl import os -import re import subprocess import time -from collections import defaultdict from enum import Enum -from math import ceil, floor from pathlib import Path -from typing import IO, List, Literal, Optional, Type +from typing import IO, Literal -import tzlocal import yaml -from aiohttp import ClientConnectionError, ClientError, ClientResponseError -from aiohttp.web_exceptions import HTTPForbidden, HTTPNotFound from structlog.stdlib import BoundLogger -import backy.backends.chunked -from backy.utils import ( - duplicates, - list_get, - list_rindex, - list_split, - min_date, - unique, -) +import backy.backup.backends.chunked +from ..backup import Backup from .backends import BackendException, BackyBackend, select_backend -from .client import APIClient, APIClientManager from .ext_deps import BACKY_EXTRACT from .quarantine import QuarantineStore from .revision import Revision, Trust, filter_schedule_tags @@ -101,7 +85,7 @@ def locked_function(self, *args, skip_lock=False, **kw): return wrap -class Backup(object): +class RbdBackup(Backup): """A backup of a VM. Provides access to methods to @@ -110,37 +94,17 @@ class Backup(object): """ - path: Path - config: dict - schedule: Schedule source: BackySourceFactory - default_backend_type: Literal["cowfile", "chunked"] - history: list[Revision] quarantine: QuarantineStore - log: BoundLogger - _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] def __init__(self, path: Path, log: BoundLogger): - self.log = log.bind(subsystem="backup") + super().__init__(path, log) self._lock_fds = {} - self.path = path.resolve() self.scan() - # Load config from file - try: - with self.path.joinpath("config").open(encoding="utf-8") as f: - self.config = yaml.safe_load(f) - except IOError: - self.log.error( - "could-not-read-config", - _fmt_msg="Could not read config file. Is --backupdir correct?", - config_path=str(self.path / "config"), - ) - raise - # Initialize our source try: source_factory = select_source(self.config["source"]["type"]) @@ -153,179 +117,16 @@ def __init__(self, path: Path, log: BoundLogger): raise self.source = source_factory(self.config["source"], self.log) - # Initialize our backend - self.default_backend_type = self.config["source"].get("backend", None) - if self.default_backend_type is None: - if not self.local_history: - # Start fresh backups with our new default. - self.default_backend_type = "chunked" - else: - # Choose to continue existing backups with whatever format - # they are in. - self.default_backend_type = self.local_history[-1].backend_type - - self.schedule = Schedule() - self.schedule.configure(self.config["schedule"]) + assert self.config["source"].get("backend", "chunked") == "chunked" self.quarantine = QuarantineStore(self.path, self.log) - @property - def name(self) -> str: - return self.path.name - - def to_dict(self): - return self.config - - def scan(self) -> None: - self.history = [] - self._by_uuid = {} - for f in self.path.glob("*.rev"): - if f.is_symlink(): - # Ignore links that are used to create readable pointers - continue - r = Revision.load(f, self, self.log) - if r.uuid not in self._by_uuid: - self._by_uuid[r.uuid] = r - self.history.append(r) - # The history is stored: oldest first. newest last. - self.history.sort(key=lambda r: r.timestamp) - - def touch(self): - self.path.touch() - - def set_purge_pending(self): - self.path.joinpath(".purge_pending").touch() - - def clear_purge_pending(self): - self.path.joinpath(".purge_pending").unlink(missing_ok=True) - - def get_history( - self, *, clean: bool = False, local: bool = False - ) -> list[Revision]: - return [ - rev - for rev in self.history - if (not clean or "duration" in rev.stats) - and (not local or not rev.server) - ] - - @property - def clean_history(self) -> List[Revision]: - """History without incomplete revisions.""" - return self.get_history(clean=True) - - @property - def local_history(self): - """History without incomplete revisions.""" - return self.get_history(local=True) - - @property - def contains_distrusted(self) -> bool: - return any( - ( - r == Trust.DISTRUSTED - for r in self.get_history(clean=True, local=True) - ) - ) - - def validate_tags(self, tags): - missing_tags = ( - filter_schedule_tags(tags) - self.schedule.schedule.keys() - ) - if missing_tags: - self.log.error( - "unknown-tags", - _fmt_msg="The following tags are missing from the schedule: {unknown_tags}\n" - "Check the config file, add the `manual:` prefix or disable tag validation (-f)", - unknown_tags=", ".join(missing_tags), - ) - raise RuntimeError("Unknown tags") - - def warn_pending_changes(self, revs: Optional[List[Revision]] = None): - revs = revs if revs is not None else self.history - pending = [r for r in revs if r.pending_changes] - if pending: - self.log.warning( - "pending-changes", - _fmt_msg="Synchronize with remote server (backy push) or risk loosing changes", - revisions=",".join(r.uuid for r in pending), - ) - - def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): - revs = revs if revs is not None else self.history - remote = [r for r in revs if r.server] - if remote: - self.log.error( - "remote-revs-disallowed", - _fmt_msg="Can not modify trust state of remote revisions locally.\n" - "Either include a filter to exclude them (local)\n" - "or edit them on the origin server and pull the changes (backy pull)", - revisions=",".join(r.uuid for r in remote), - ) - raise RuntimeError("Remote revs disallowed") - ################# # Making backups - @locked(target=".backup", mode="exclusive") - def _clean(self) -> None: - """Clean-up incomplete revisions.""" - for revision in self.local_history: - if "duration" not in revision.stats: - self.log.warning( - "clean-incomplete", revision_uuid=revision.uuid - ) - revision.remove() - - @locked(target=".backup", mode="exclusive") - def forget(self, revision: str) -> None: - for r in self.find_revisions(revision): - r.remove() - - @locked(target=".backup", mode="exclusive") - def expire(self): - self.schedule.expire(self) - - @locked(target=".backup", mode="exclusive") - def tags( - self, - action: Literal["set", "add", "remove"], - revision: str, - tags: set[str], - expect: Optional[set[str]] = None, - autoremove: bool = False, - force=False, - ) -> bool: - self.scan() - revs = self.find_revisions(revision) - if not force and action != "remove": - self.validate_tags(tags) - for r in revs: - if expect is not None and expect != r.tags: - self.log.error("tags-expectation-failed") - return False - for r in revs: - match action: - case "set": - r.tags = tags - case "add": - r.tags |= tags - case "remove": - r.tags -= tags - case _: - raise ValueError(f"invalid action '{action}'") - if not r.tags and autoremove: - r.remove() - else: - r.write_info() - return True - @locked(target=".backup", mode="exclusive") @locked(target=".purge", mode="shared") - def backup(self, tags: set[str], force: bool = False) -> bool: - if not force: - self.validate_tags(tags) - + def backup(self, revision: str) -> bool: self.path.joinpath("last").unlink(missing_ok=True) self.path.joinpath("last.rev").unlink(missing_ok=True) @@ -336,14 +137,7 @@ def backup(self, tags: set[str], force: bool = False) -> bool: "Source is not ready (does it exist? can you access it?)" ) - new_revision = Revision.create(self, tags, self.log) - new_revision.materialize() - self.log.info( - "created-revision", - revision_uuid=new_revision.uuid, - tags=", ".join(new_revision.tags), - ) - + new_revision = self.find_by_uuid(revision) backend = new_revision.backend with self.source(new_revision) as source: try: @@ -366,7 +160,6 @@ def backup(self, tags: set[str], force: bool = False) -> bool: new_revision.stats["duration"] = time.time() - start new_revision.write_info() new_revision.readonly() - self.scan() # Switched from a fine-grained syncing mechanism to "everything # once" when we're done. This is as safe but much faster. os.sync() @@ -383,14 +176,6 @@ def backup(self, tags: set[str], force: bool = False) -> bool: break return verified - @locked(target=".backup", mode="exclusive") - def distrust(self, revision: str) -> None: - revs = self.find_revisions(revision) - self.prevent_remote_rev(revs) - for r in revs: - r.distrust() - r.write_info() - @locked(target=".purge", mode="shared") def verify(self, revision: str) -> None: revs = self.find_revisions(revision) @@ -399,7 +184,7 @@ def verify(self, revision: str) -> None: r.backend.verify() @locked(target=".purge", mode="exclusive") - def purge(self) -> None: + def gc(self) -> None: self.local_history[-1].backend.purge() self.clear_purge_pending() @@ -494,418 +279,3 @@ def restore_stdout(self, source: IO) -> None: if not chunk: break target.write(chunk) - - @locked(target=".purge", mode="shared") - def upgrade(self) -> None: - """Upgrade this backup's store from cowfile to chunked. - - This can take a long time and is intended to be interruptable. - - We start creating new backups with the new format once everything - is converted as we do not want to interfere with the config file - but allow upgrading without a format specification. - - """ - from backy.backends.chunked import ChunkedFileBackend - from backy.sources.file import File - - last_worklist: List[Revision] = [] - - while True: - self.scan() - to_upgrade: List[Revision] = [ - r - for r in self.get_history(clean=True, local=True) - if r.backend_type == "cowfile" - ] - if not to_upgrade: - break - if to_upgrade == last_worklist: - self.log.error("upgrade-no-progress") - break - last_worklist = to_upgrade - - self.log.info("upgrade-found-new", num_revisions=len(to_upgrade)) - # Upgrade the newest then start again. The revisions may change - # beneath us and this may cause a) new revisions to appear and b) - # old revisions to disappear. We want to upgraded new revisions as - # quickly as possible as having the newest upgraded means that - # then next backup will be able to use the new format and we don't - # have to re-upgrade it again. - try: - revision = to_upgrade[-1] - self.log.info( - "upgrade-converting", - revision_uuid=revision.uuid, - timestamp=revision.timestamp, - ) - original_file = revision.filename.with_suffix( - revision.filename.suffix + ".old" - ) - if not os.path.exists(original_file): - # We may be resuming a partial upgrade. Only move the file - # if our .old doesn't exist. - os.rename(revision.filename, original_file) - else: - self.log.info("upgrade-resuming") - if os.path.exists(revision.filename): - os.unlink(revision.filename) - revision.writable() - chunked = ChunkedFileBackend(revision, self.log) - file = File(dict(filename=original_file, cow=False), self.log)( - revision - ) - # Keep a copy of the statistics as it will get replaced when - # running the full copy. - original_stats = revision.stats.copy() - with file as f: - f.backup(chunked) - revision.stats = original_stats - revision.backend_type = "chunked" - revision.write_info() - revision.readonly() - os.unlink(original_file) - except Exception: - self.log.exception("upgrade-error") - # We may be seeing revisions getting removed, try again. - return - - # Wait a bit, to be graceful to the host system just in case this - # truns into a spinning loop. - time.sleep(5) - - ###################### - # Looking up revisions - - def last_by_tag(self) -> dict[str, datetime.datetime]: - """Return a dictionary showing the last time each tag was - backed up. - - Tags that have never been backed up won't show up here. - - """ - last_times: dict[str, datetime.datetime] = {} - for revision in self.clean_history: - for tag in revision.tags: - last_times.setdefault(tag, min_date()) - last_times[tag] = max([last_times[tag], revision.timestamp]) - return last_times - - def find_revisions( - self, spec: str | List[str | Revision | List[Revision]] - ) -> List[Revision]: - """Get a sorted list of revisions, oldest first, that match the given - specification. - """ - - tokens: List[str | Revision | List[Revision]] - if isinstance(spec, str): - tokens = [ - t.strip() - for t in re.split(r"(\(|\)|,|&|\.\.)", spec) - if t.strip() - ] - else: - tokens = spec - if "(" in tokens and ")" in tokens: - i = list_rindex(tokens, "(") - j = tokens.index(")", i) - prev, middle, next = tokens[:i], tokens[i + 1 : j], tokens[j + 1 :] - - functions = { - "first": lambda x: x[0], - "last": lambda x: x[-1], - "not": lambda x: [r for r in self.history if r not in x], - "reverse": lambda x: list(reversed(x)), - } - if prev and isinstance(prev[-1], str) and prev[-1] in functions: - return self.find_revisions( - prev[:-1] - + [functions[prev[-1]](self.find_revisions(middle))] - + next - ) - return self.find_revisions( - prev + [self.find_revisions(middle)] + next - ) - elif "," in tokens: - i = tokens.index(",") - return unique( - self.find_revisions(tokens[:i]) - + self.find_revisions(tokens[i + 1 :]) - ) - elif "&" in tokens: - i = tokens.index("&") - return duplicates( - self.find_revisions(tokens[:i]), - self.find_revisions(tokens[i + 1 :]), - ) - elif ".." in tokens: - _a, _b = list_split(tokens, "..") - assert len(_a) <= 1 and len(_b) <= 1 - a = self.index_by_token(list_get(_a, 0, "first")) - b = self.index_by_token(list_get(_b, 0, "last")) - return self.history[ceil(min(a, b)) : floor(max(a, b)) + 1] - assert len(tokens) == 1 - token = tokens[0] - if isinstance(token, Revision): - return [token] - elif isinstance(token, list): - return token - if token.startswith("server:"): - server = token.removeprefix("server:") - return [r for r in self.history if server == r.server] - elif token.startswith("tag:"): - tag = token.removeprefix("tag:") - return [r for r in self.history if tag in r.tags] - elif token.startswith("trust:"): - trust = Trust(token.removeprefix("trust:").lower()) - return [r for r in self.history if trust == r.trust] - elif token == "all": - return self.history[:] - elif token == "clean": - return self.clean_history - elif token == "local": - return self.find_revisions("server:") - elif token == "remote": - return self.find_revisions("not(server:)") - else: - return [self.find(token)] - - def index_by_token(self, spec: str | Revision | List[Revision]) -> float: - assert not isinstance( - spec, list - ), "can only index a single revision specifier" - if isinstance(spec, str): - return self.index_by_date(spec) or self.history.index( - self.find(spec) - ) - else: - return self.history.index(spec) - - def index_by_date(self, spec: str) -> Optional[float]: - """Return index of revision matched by datetime. - Index may be fractional if there is no exact datetime match. - Index range: [-0.5, len+0.5] - """ - try: - date = datetime.datetime.fromisoformat(spec) - date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone()) - l = list_get( - [i for i, r in enumerate(self.history) if r.timestamp <= date], - -1, - -1, - ) - r = list_get( - [i for i, r in enumerate(self.history) if r.timestamp >= date], - 0, - len(self.history), - ) - print(spec, l, r) - assert ( - 0 <= r - l <= 1 - ), "can not index with date if multiple revision have the same timestamp" - return (l + r) / 2.0 - except ValueError: - return None - - def find_by_number(self, _spec: str) -> Revision: - """Returns revision by relative number. - - 0 is the newest, - 1 is the next older, - 2 is the even next older, - and so on ... - - Raises IndexError or ValueError if no revision is found. - """ - spec = int(_spec) - if spec < 0: - raise KeyError("Integer revisions must be positive") - return self.history[-spec - 1] - - def find_by_tag(self, spec: str) -> Revision: - """Returns the latest revision matching a given tag. - - Raises IndexError or ValueError if no revision is found. - """ - if spec in ["last", "latest"]: - return self.history[-1] - if spec == "first": - return self.history[0] - raise ValueError() - - def find_by_uuid(self, spec: str) -> Revision: - """Returns revision matched by UUID. - - Raises IndexError if no revision is found. - """ - try: - return self._by_uuid[spec] - except KeyError: - raise IndexError() - - def find_by_function(self, spec: str) -> Revision: - m = re.fullmatch(r"(\w+)\(.+\)", spec) - if m and m.group(1) in ["first", "last"]: - return self.find_revisions(m.group(0))[0] - raise ValueError() - - def find(self, spec: str) -> Revision: - """Flexible revision search. - - Locates a revision by relative number, by tag, or by uuid. - - """ - spec = spec.strip() - if spec == "" or not self.history: - raise KeyError(spec) - - for find in ( - self.find_by_number, - self.find_by_uuid, - self.find_by_tag, - self.find_by_function, - ): - try: - return find(spec) - except (ValueError, IndexError): - pass - self.log.warning("find-rev-not-found", spec=spec) - raise KeyError(spec) - - ################### - # Syncing Revisions - - @locked(target=".backup", mode="exclusive") - async def push_metadata(self, peers, taskid: str) -> int: - grouped = defaultdict(list) - for r in self.clean_history: - if r.pending_changes: - grouped[r.server].append(r) - self.log.info( - "push-start", changes=sum(len(l) for l in grouped.values()) - ) - async with APIClientManager(peers, taskid, self.log) as apis: - errors = await asyncio.gather( - *[ - self._push_metadata(apis[server], grouped[server]) - for server in apis - ] - ) - self.log.info("push-end", errors=sum(errors)) - return sum(errors) - - async def _push_metadata( - self, api: APIClient, revs: List[Revision] - ) -> bool: - purge_required = False - error = False - for r in revs: - log = self.log.bind( - server=r.server, - rev_uuid=r.uuid, - ) - log.debug( - "push-updating-tags", - old_tags=r.orig_tags, - new_tags=r.tags, - ) - try: - await api.put_tags(r, autoremove=True) - if r.tags: - r.orig_tags = r.tags - r.write_info() - else: - r.remove(force=True) - purge_required = True - except ClientResponseError: - log.warning("push-client-error", exc_style="short") - error = True - except ClientConnectionError: - log.warning("push-connection-error", exc_style="short") - error = True - except ClientError: - log.exception("push-error") - error = True - - if purge_required: - log = self.log.bind(server=api.server_name) - log.debug("push-purging-remote") - try: - await api.run_purge(self.name) - except ClientResponseError: - log.warning("push-purge-client-error", exc_style="short") - error = True - except ClientConnectionError: - log.warning("push-purge-connection-error", exc_style="short") - error = True - except ClientError: - log.error("push-purge-error") - error = True - return error - - @locked(target=".backup", mode="exclusive") - async def pull_metadata(self, peers: dict, taskid: str) -> int: - async def remove_dead_peer(): - for r in list(self.history): - if r.server and r.server not in peers: - self.log.info( - "pull-removing-dead-peer", - rev_uuid=r.uuid, - server=r.server, - ) - r.remove(force=True) - return False - - self.log.info("pull-start") - async with APIClientManager(peers, taskid, self.log) as apis: - errors = await asyncio.gather( - remove_dead_peer(), - *[self._pull_metadata(apis[server]) for server in apis], - ) - self.log.info("pull-end", errors=sum(errors)) - return sum(errors) - - async def _pull_metadata(self, api: APIClient) -> bool: - error = False - log = self.log.bind(server=api.server_name) - try: - await api.touch_backup(self.name) - remote_revs = await api.get_revs(self) - log.debug("pull-found-revs", revs=len(remote_revs)) - except ClientResponseError as e: - if e.status in [ - HTTPNotFound.status_code, - HTTPForbidden.status_code, - ]: - log.debug("pull-not-found") - else: - log.warning("pull-client-error", exc_style="short") - error = True - remote_revs = [] - except ClientConnectionError: - log.warning("pull-connection-error", exc_style="short") - return True - except ClientError: - log.exception("pull-error") - error = True - remote_revs = [] - - local_uuids = { - r.uuid for r in self.history if r.server == api.server_name - } - remote_uuids = {r.uuid for r in remote_revs} - for uuid in local_uuids - remote_uuids: - log.warning("pull-removing-unknown-rev", rev_uuid=uuid) - self.find_by_uuid(uuid).remove(force=True) - - for r in remote_revs: - if r.uuid in local_uuids: - if r.to_dict() == self.find_by_uuid(r.uuid).to_dict(): - continue - log.debug("pull-updating-rev", rev_uid=r.uuid) - else: - log.debug("pull-new-rev", rev_uid=r.uuid) - r.write_info() - - return error diff --git a/src/backy/rbd/revision.py b/src/backy/rbd/revision.py new file mode 100644 index 00000000..efd46d16 --- /dev/null +++ b/src/backy/rbd/revision.py @@ -0,0 +1,187 @@ +import datetime +from enum import Enum +from pathlib import Path +from typing import IO, TYPE_CHECKING, Literal, Optional + +import shortuuid +import yaml +from structlog.stdlib import BoundLogger + +from ..revision import Revision +from . import utils +from .backends import select_backend +from .utils import SafeFile + +if TYPE_CHECKING: + from .backends import BackyBackend + from .backup import Backup + + +def filter_schedule_tags(tags): + return {t for t in tags if not t.startswith(TAG_MANUAL_PREFIX)} + + +def filter_manual_tags(tags): + return {t for t in tags if t.startswith(TAG_MANUAL_PREFIX)} + + +class RbdRevision(Revision): + backup: "Backup" + uuid: str + timestamp: datetime.datetime + stats: dict + tags: set[str] + orig_tags: set[str] + trust: Trust = Trust.TRUSTED + backend_type: Literal["cowfile", "chunked"] = "chunked" + server: str = "" + log: BoundLogger + + def __init__( + self, + backup: "Backup", + log: BoundLogger, + uuid: Optional[str] = None, + timestamp: Optional[datetime.datetime] = None, + ) -> None: + self.backup = backup + self.uuid = uuid if uuid else shortuuid.uuid() + self.timestamp = timestamp if timestamp else utils.now() + self.stats = {"bytes_written": 0} + self.tags = set() + self.orig_tags = set() + self.log = log.bind(revision_uuid=self.uuid, subsystem="revision") + + @classmethod + def create( + cls, + backup: "Backup", + tags: set[str], + log: BoundLogger, + *, + uuid: Optional[str] = None, + ) -> "Revision": + r = Revision(backup, log, uuid) + r.tags = tags + r.backend_type = backup.default_backend_type + return r + + @property + def backend(self) -> "BackyBackend": + return select_backend(self.backend_type)(self, self.log) + + @classmethod + def load(cls, file: Path, backup: "Backup", log: BoundLogger) -> "Revision": + with file.open(encoding="utf-8") as f: + metadata = yaml.safe_load(f) + r = cls.from_dict(metadata, backup, log) + return r + + @classmethod + def from_dict(cls, metadata, backup, log): + ts = metadata["timestamp"] + if isinstance(ts, str): + ts = datetime.datetime.fromisoformat(ts) + assert ts.tzinfo == datetime.timezone.utc + r = Revision(backup, log, uuid=metadata["uuid"], timestamp=ts) + r.stats = metadata.get("stats", {}) + r.tags = set(metadata.get("tags", [])) + r.orig_tags = set(metadata.get("orig_tags", [])) + r.server = metadata.get("server", "") + # Assume trusted by default to support migration + r.trust = Trust(metadata.get("trust", Trust.TRUSTED.value)) + # If the metadata does not show the backend type, then it's cowfile. + r.backend_type = metadata.get("backend_type", "cowfile") + return r + + @property + def filename(self) -> Path: + """Full pathname of the image file.""" + return self.backup.path / self.uuid + + @property + def info_filename(self) -> Path: + """Full pathname of the metadata file.""" + return self.filename.with_suffix(self.filename.suffix + ".rev") + + def materialize(self) -> None: + self.write_info() + self.writable() + + def write_info(self) -> None: + self.log.debug("writing-info", tags=", ".join(self.tags)) + with SafeFile(self.info_filename, encoding="utf-8") as f: + f.open_new("wb") + f.write("# Please use the `backy tags` subcommand to edit tags\n") + yaml.safe_dump(self.to_dict(), f) + + def to_dict(self) -> dict: + return { + "uuid": self.uuid, + "backend_type": self.backend_type, + "timestamp": self.timestamp, + "parent": getattr( + self.get_parent(), "uuid", "" + ), # compatibility with older versions + "stats": self.stats, + "trust": self.trust.value, + "tags": list(self.tags), + "orig_tags": list(self.orig_tags), + "server": self.server, + } + + @property + def pending_changes(self): + return self.server and self.tags != self.orig_tags + + def distrust(self) -> None: + assert not self.server + self.log.info("distrusted") + self.trust = Trust.DISTRUSTED + + def verify(self) -> None: + assert not self.server + self.log.info("verified") + self.trust = Trust.VERIFIED + + def remove(self, force=False) -> None: + self.log.info("remove") + if not force and self.server: + self.log.debug("remove-remote", server=self.server) + self.tags = set() + self.write_info() + else: + for filename in self.filename.parent.glob(self.filename.name + "*"): + if filename.exists(): + self.log.debug("remove-start", filename=filename) + filename.unlink() + self.log.debug("remove-end", filename=filename) + + if self in self.backup.history: + self.backup.history.remove(self) + del self.backup._by_uuid[self.uuid] + + def writable(self) -> None: + if self.filename.exists(): + self.filename.chmod(0o640) + self.info_filename.chmod(0o640) + + def readonly(self) -> None: + if self.filename.exists(): + self.filename.chmod(0o440) + self.info_filename.chmod(0o440) + + def get_parent(self, ignore_trust=False) -> Optional["Revision"]: + """defaults to last rev if not in history""" + prev = None + for r in self.backup.history: + if r.backend_type != self.backend_type: + continue + if not ignore_trust and r.trust == Trust.DISTRUSTED: + continue + if r.server != self.server: + continue + if r.uuid == self.uuid: + break + prev = r + return prev diff --git a/src/backy/revision.py b/src/backy/revision.py index 9f67fbdf..10c56772 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -41,7 +41,6 @@ class Revision(object): tags: set[str] orig_tags: set[str] trust: Trust = Trust.TRUSTED - backend_type: Literal["cowfile", "chunked"] = "chunked" server: str = "" log: BoundLogger @@ -71,13 +70,8 @@ def create( ) -> "Revision": r = Revision(backup, log, uuid) r.tags = tags - r.backend_type = backup.default_backend_type return r - @property - def backend(self) -> "BackyBackend": - return select_backend(self.backend_type)(self, self.log) - @classmethod def load(cls, file: Path, backup: "Backup", log: BoundLogger) -> "Revision": with file.open(encoding="utf-8") as f: @@ -126,7 +120,6 @@ def write_info(self) -> None: def to_dict(self) -> dict: return { "uuid": self.uuid, - "backend_type": self.backend_type, "timestamp": self.timestamp, "parent": getattr( self.get_parent(), "uuid", "" @@ -183,8 +176,6 @@ def get_parent(self, ignore_trust=False) -> Optional["Revision"]: """defaults to last rev if not in history""" prev = None for r in self.backup.history: - if r.backend_type != self.backend_type: - continue if not ignore_trust and r.trust == Trust.DISTRUSTED: continue if r.server != self.server: From 9cb68cfca8e1b1712d298f8683f3e80adc2a2d16 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Tue, 25 Jun 2024 17:02:46 +0200 Subject: [PATCH 05/25] snapshot: try to clean up the daemon sub-package --- src/backy/backup.py | 99 ++++++++++-- src/backy/cli/client.py | 187 ++++++---------------- src/backy/daemon/__init__.py | 72 +++++++-- src/backy/daemon/api.py | 205 +++++++++++++++++++++---- src/backy/daemon/scheduler.py | 130 +++++----------- src/backy/logging.py | 16 +- src/backy/rbd/backends/chunked/file.py | 4 +- src/backy/rbd/backup.py | 62 ++------ 8 files changed, 428 insertions(+), 347 deletions(-) diff --git a/src/backy/backup.py b/src/backy/backup.py index 0bea1e4b..178a76c5 100644 --- a/src/backy/backup.py +++ b/src/backy/backup.py @@ -1,8 +1,9 @@ import datetime +import fcntl import re from math import ceil, floor from pathlib import Path -from typing import IO, List, Literal, Optional, Type +from typing import List, Literal, Optional, TypedDict import tzlocal import yaml @@ -21,8 +22,24 @@ from .schedule import Schedule +class StatusDict(TypedDict): + job: str + sla: str + sla_overdue: int + status: str + last_time: Optional[datetime.datetime] + last_tags: Optional[str] + last_duration: Optional[float] + next_time: Optional[datetime.datetime] + next_tags: Optional[str] + manual_tags: str + problem_reports: List[str] + unsynced_revs: int + local_revs: int + + class Backup(object): - """A backup of a VM.""" + """A generic backup of some data source.""" path: Path config: dict @@ -51,6 +68,56 @@ def __init__(self, path: Path, log: BoundLogger): self.schedule = Schedule() self.schedule.configure(self.config["schedule"]) + @property + def problem_reports(self) -> list[str]: + return [] + + # I placed this on the class because this is usually used in conjunction + # with the class and improves cohesiveness and readability IMHO. + @staticmethod + def locked(target=None, mode=None): + if mode == "shared": + mode = fcntl.LOCK_SH + elif mode == "exclusive": + mode = fcntl.LOCK_EX | fcntl.LOCK_NB + else: + raise ValueError("Unknown lock mode '{}'".format(mode)) + + def wrap(f): + def locked_function(self, *args, skip_lock=False, **kw): + if skip_lock: + return f(self, *args, **kw) + if target in self._lock_fds: + raise RuntimeError("Bug: Locking is not re-entrant.") + target_path = self.path / target + if not target_path.exists(): + target_path.touch() + self._lock_fds[target] = target_path.open() + try: + fcntl.flock(self._lock_fds[target], mode) + except BlockingIOError: + self.log.warning( + "lock-no-exclusive", + _fmt_msg="Failed to get exclusive lock for '{function}'.", + function=f.__name__, + ) + raise + else: + try: + return f(self, *args, **kw) + finally: + fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) + finally: + self._lock_fds[target].close() + del self._lock_fds[target] + + locked_function.__name__ = "locked({}, {})".format( + f.__name__, target + ) + return locked_function + + return wrap + @property def name(self) -> str: return self.path.name @@ -117,8 +184,10 @@ def validate_tags(self, tags): if missing_tags: self.log.error( "unknown-tags", - _fmt_msg="The following tags are missing from the schedule: {unknown_tags}\n" - "Check the config file, add the `manual:` prefix or disable tag validation (-f)", + _fmt_msg="The following tags are missing from the schedule: " + "{unknown_tags}\n" + "Check the config file, add the `manual:` prefix or disable " + "tag validation (-f)", unknown_tags=", ".join(missing_tags), ) raise RuntimeError("Unknown tags") @@ -129,7 +198,8 @@ def warn_pending_changes(self, revs: Optional[List[Revision]] = None): if pending: self.log.warning( "pending-changes", - _fmt_msg="Synchronize with remote server (backy push) or risk loosing changes", + _fmt_msg="Synchronize with remote server (backy push) or " + "risk loosing changes", revisions=",".join(r.uuid for r in pending), ) @@ -139,9 +209,11 @@ def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): if remote: self.log.error( "remote-revs-disallowed", - _fmt_msg="Can not modify trust state of remote revisions locally.\n" + _fmt_msg="Can not modify trust state of remote revisions " + "locally.\n" "Either include a filter to exclude them (local)\n" - "or edit them on the origin server and pull the changes (backy pull)", + "or edit them on the origin server and pull the changes " + "(backy pull)", revisions=",".join(r.uuid for r in remote), ) raise RuntimeError("Remote revs disallowed") @@ -326,7 +398,7 @@ def index_by_date(self, spec: str) -> Optional[float]: try: date = datetime.datetime.fromisoformat(spec) date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone()) - l = list_get( + L = list_get( [i for i, r in enumerate(self.history) if r.timestamp <= date], -1, -1, @@ -336,11 +408,12 @@ def index_by_date(self, spec: str) -> Optional[float]: 0, len(self.history), ) - print(spec, l, r) - assert ( - 0 <= r - l <= 1 - ), "can not index with date if multiple revision have the same timestamp" - return (l + r) / 2.0 + print(spec, L, r) + assert 0 <= r - L <= 1, ( + "can not index with date if multiple revision have the same " + "timestamp" + ) + return (L + r) / 2.0 except ValueError: return None diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py index e35dacdb..2ace349f 100644 --- a/src/backy/cli/client.py +++ b/src/backy/cli/client.py @@ -20,155 +20,54 @@ from backy.daemon import BackyDaemon -class APIClientManager: - connector: TCPConnector - peers: dict[str, dict] - clients: dict[str, "APIClient"] - taskid: str - log: BoundLogger - - def __init__(self, peers: Dict[str, dict], taskid: str, log: BoundLogger): - self.connector = TCPConnector() - self.peers = peers - self.clients = dict() - self.taskid = taskid - self.log = log.bind(subsystem="APIClientManager") - - def __getitem__(self, name: str) -> "APIClient": - if name and name not in self.clients: - self.clients[name] = APIClient.from_conf( - name, self.peers[name], self.taskid, self.log, self.connector +# XXX this is partially duplicated in the daemon +def status(self, filter_re: Optional[Pattern[str]] = None) -> List[StatusDict]: + """Collects status information for all jobs.""" + # XXX with a database backend, we can evaluate this in live actually + # so this should move to the CLI client + result: List["BackyDaemon.StatusDict"] = [] + for job in list(self.jobs.values()): + if filter_re and not filter_re.search(job.name): + continue + job.backup.scan() + manual_tags = set() + unsynced_revs = 0 + history = job.backup.clean_history + for rev in history: + manual_tags |= filter_manual_tags(rev.tags) + if rev.pending_changes: + unsynced_revs += 1 + result.append( + dict( + job=job.name, + sla="OK" if job.sla else "TOO OLD", + sla_overdue=job.sla_overdue, + status=job.status, + last_time=history[-1].timestamp if history else None, + last_tags=( + ",".join(job.schedule.sorted_tags(history[-1].tags)) + if history + else None + ), + last_duration=( + history[-1].stats.get("duration", 0) if history else None + ), + next_time=job.next_time, + next_tags=( + ",".join(job.schedule.sorted_tags(job.next_tags)) + if job.next_tags + else None + ), + manual_tags=", ".join(manual_tags), + quarantine_reports=len(job.backup.quarantine.report_ids), + unsynced_revs=unsynced_revs, + local_revs=len(job.backup.get_history(clean=True, local=True)), ) - return self.clients[name] - - def __iter__(self) -> Iterator[str]: - return iter(self.peers) - - async def close(self) -> None: - for c in self.clients.values(): - await c.close() - await self.connector.close() - - async def __aenter__(self) -> "APIClientManager": - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close() - - -class APIClient: - log: BoundLogger - server_name: str - session: aiohttp.ClientSession - - def __init__( - self, - server_name: str, - url: str, - token: str, - taskid: str, - log, - connector=None, - ): - assert get_running_loop().is_running() - self.log = log.bind(subsystem="APIClient") - self.server_name = server_name - self.session = aiohttp.ClientSession( - url, - headers={hdrs.AUTHORIZATION: "Bearer " + token, "taskid": taskid}, - raise_for_status=True, - timeout=ClientTimeout(30, connect=10), - connector=connector, - connector_owner=connector is None, - ) - - @classmethod - def from_conf(cls, server_name, conf, *args, **kwargs): - return cls( - server_name, - conf["url"], - conf["token"], - *args, - **kwargs, ) - - async def fetch_status( - self, filter: str = "" - ) -> List["BackyDaemon.StatusDict"]: - async with self.session.get( - "/v1/status", params={"filter": filter} - ) as response: - jobs = await response.json() - for job in jobs: - if job["last_time"]: - job["last_time"] = datetime.datetime.fromisoformat( - job["last_time"] - ) - if job["next_time"]: - job["next_time"] = datetime.datetime.fromisoformat( - job["next_time"] - ) - return jobs - - async def reload_daemon(self): - async with self.session.post(f"/v1/reload") as response: - return - - async def get_jobs(self) -> List[dict]: - async with self.session.get("/v1/jobs") as response: - return await response.json() - - async def run_job(self, name: str): - async with self.session.post(f"/v1/jobs/{name}/run") as response: - return - - async def list_backups(self) -> List[str]: - async with self.session.get("/v1/backups") as response: - return await response.json() - - async def run_purge(self, name: str): - async with self.session.post(f"/v1/backups/{name}/purge") as response: - return - - async def touch_backup(self, name: str): - async with self.session.post(f"/v1/backups/{name}/touch") as response: - return - - async def get_revs( - self, backup: "backy.backup.Backup", only_clean: bool = True - ) -> List[Revision]: - async with self.session.get( - f"/v1/backups/{backup.name}/revs", - params={"only_clean": int(only_clean)}, - ) as response: - json = await response.json() - revs = [Revision.from_dict(r, backup, self.log) for r in json] - for r in revs: - r.backend_type = "" - r.orig_tags = r.tags - r.server = self.server_name - return revs - - async def put_tags(self, rev: Revision, autoremove: bool = False): - async with self.session.put( - f"/v1/backups/{rev.backup.name}/revs/{rev.uuid}/tags", - json={"old_tags": list(rev.orig_tags), "new_tags": list(rev.tags)}, - params={"autoremove": int(autoremove)}, - ) as response: - return - - async def close(self): - await self.session.close() - - async def __aenter__(self) -> "APIClient": - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close() + return result class CLIClient: - api: APIClient log: BoundLogger def __init__(self, apiclient, log): diff --git a/src/backy/daemon/__init__.py b/src/backy/daemon/__init__.py index 9ce07c86..e315b58a 100644 --- a/src/backy/daemon/__init__.py +++ b/src/backy/daemon/__init__.py @@ -2,8 +2,6 @@ import argparse import asyncio -import datetime -import errno import fcntl import os import os.path as p @@ -11,28 +9,22 @@ import sys import time from pathlib import Path -from typing import IO, List, Literal, Optional, Pattern, TypedDict +from typing import IO, List, Optional, Pattern import aiofiles.os as aos import aioshutil -import humanize import structlog -import tzlocal import yaml -from aiohttp import ClientConnectionError -from rich import print as rprint -from rich.table import Column, Table from structlog.stdlib import BoundLogger -from backy.utils import format_datetime_local, generate_taskid +from backy import logging +from backy.backup import Backup, StatusDict +from backy.revision import filter_manual_tags +from backy.schedule import Schedule +from backy.utils import has_recent_changes, is_dir_no_symlink -from . import logging from .api import BackyAPI -from .backup import Backup -from .revision import filter_manual_tags -from .schedule import Schedule from .scheduler import Job -from .utils import has_recent_changes, is_dir_no_symlink daemon: "BackyDaemon" @@ -335,6 +327,58 @@ async def purge_pending_backups(self): self.log.exception("purge-pending") await asyncio.sleep(24 * 60 * 60) + # XXX this is duplicated in the client + def status( + self, filter_re: Optional[Pattern[str]] = None + ) -> List[StatusDict]: + """Collects status information for all jobs.""" + # XXX with a database backend, we can evaluate this in live actually + # so this should move to the CLI client + result: List[StatusDict] = [] + for job in list(self.jobs.values()): + if filter_re and not filter_re.search(job.name): + continue + job.backup.scan() + manual_tags = set() + unsynced_revs = 0 + history = job.backup.clean_history + for rev in history: + manual_tags |= filter_manual_tags(rev.tags) + if rev.pending_changes: + unsynced_revs += 1 + result.append( + dict( + job=job.name, + sla="OK" if job.sla else "TOO OLD", + sla_overdue=job.sla_overdue, + status=job.status, + last_time=history[-1].timestamp if history else None, + last_tags=( + ",".join(job.schedule.sorted_tags(history[-1].tags)) + if history + else None + ), + last_duration=( + history[-1].stats.get("duration", 0) + if history + else None + ), + next_time=job.next_time, + next_tags=( + ",".join(job.schedule.sorted_tags(job.next_tags)) + if job.next_tags + else None + ), + manual_tags=", ".join(manual_tags), + problem_reports=job.backup.problem_reports, + unsynced_revs=unsynced_revs, + local_revs=len( + job.backup.get_history(clean=True, local=True) + ), + ) + ) + return result + def main(): parser = argparse.ArgumentParser( diff --git a/src/backy/daemon/api.py b/src/backy/daemon/api.py index d59ee6ff..efb0fa67 100644 --- a/src/backy/daemon/api.py +++ b/src/backy/daemon/api.py @@ -1,10 +1,11 @@ import datetime import re +from asyncio import get_running_loop from json import JSONEncoder -from pathlib import Path -from typing import TYPE_CHECKING, Any, List, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple -from aiohttp import hdrs, web +import aiohttp +from aiohttp import ClientTimeout, TCPConnector, hdrs, web from aiohttp.web_exceptions import ( HTTPAccepted, HTTPBadRequest, @@ -18,11 +19,13 @@ from aiohttp.web_runner import AppRunner, TCPSite from structlog.stdlib import BoundLogger -from backy.backup import Backup +import backy.backup +from backy.backup import Backup, StatusDict from backy.revision import Revision -from backy.scheduler import Job from backy.utils import generate_taskid +from .scheduler import Job + if TYPE_CHECKING: from backy.daemon import BackyDaemon @@ -37,6 +40,12 @@ def default(self, o: Any) -> Any: super().default(o) +def to_json(response: Any) -> aiohttp.web.StreamResponse: + if response is None: + raise web.HTTPNoContent() + return web.json_response(response, dumps=BackyJSONEncoder().encode) + + class BackyAPI: daemon: "BackyDaemon" sites: dict[Tuple[str, int], TCPSite] @@ -49,7 +58,7 @@ def __init__(self, daemon, log): self.daemon = daemon self.sites = {} self.app = web.Application( - middlewares=[self.log_conn, self.require_auth, self.to_json] + middlewares=[self.log_conn, self.require_auth] ) self.app.add_routes( [ @@ -136,35 +145,30 @@ async def require_auth(self, request: web.Request, handler): request["log"] = request["log"].bind(job_name="~" + client) return await handler(request) - @middleware - async def to_json(self, request: web.Request, handler): - resp = await handler(request) - if isinstance(resp, web.Response): - return resp - elif resp is None: - raise web.HTTPNoContent() - else: - return web.json_response(resp, dumps=BackyJSONEncoder().encode) - async def get_status( self, request: web.Request - ) -> List["BackyDaemon.StatusDict"]: + ) -> aiohttp.web.StreamResponse: filter = request.query.get("filter", None) request["log"].info("get-status", filter=filter) if filter: - filter = re.compile(filter) - return self.daemon.status(filter) + filter_re = re.compile(filter) + return to_json(self.daemon.status(filter_re)) async def reload_daemon(self, request: web.Request): request["log"].info("reload-daemon") self.daemon.reload() + return to_json(None) - async def get_jobs(self, request: web.Request) -> List[Job]: + async def get_jobs(self, request: web.Request): request["log"].info("get-jobs") - return list(self.daemon.jobs.values()) + return to_json(list(self.daemon.jobs.values())) async def get_job(self, request: web.Request) -> Job: name = request.match_info.get("job_name") + if name is None: + request["log"].info("empty-job") + raise HTTPNotFound() + request["log"].info("get-job", name=name) try: return self.daemon.jobs[name] @@ -178,9 +182,9 @@ async def run_job(self, request: web.Request): j.run_immediately.set() raise HTTPAccepted() - async def list_backups(self, request: web.Request) -> List[str]: + async def list_backups(self, request: web.Request): request["log"].info("list-backups") - return list(self.daemon.dead_backups.keys()) + return to_json(list(self.daemon.dead_backups.keys())) async def get_backup( self, request: web.Request, allow_active: bool @@ -207,13 +211,16 @@ async def touch_backup(self, request: web.Request): backup = await self.get_backup(request, True) request["log"].info("touch-backup", name=backup.name) backup.touch() + raise web.HTTPNoContent() - async def get_revs(self, request: web.Request) -> List[Revision]: + async def get_revs(self, request: web.Request): backup = await self.get_backup(request, True) request["log"].info("get-revs", name=backup.name) backup.scan() - return backup.get_history( - local=True, clean=request.query.get("only_clean", "") == "1" + return to_json( + backup.get_history( + local=True, clean=request.query.get("only_clean", "") == "1" + ) ) async def put_tags(self, request: web.Request): @@ -252,3 +259,149 @@ async def put_tags(self, request: web.Request): except BlockingIOError: request["log"].info("put-tags-locked") raise HTTPServiceUnavailable() + raise web.HTTPNoContent() + + +class ClientManager: + connector: TCPConnector + peers: dict[str, dict] + clients: dict[str, "Client"] + taskid: str + log: BoundLogger + + def __init__(self, peers: Dict[str, dict], taskid: str, log: BoundLogger): + self.connector = TCPConnector() + self.peers = peers + self.clients = dict() + self.taskid = taskid + self.log = log.bind(subsystem="ClientManager") + + def __getitem__(self, name: str) -> "Client": + if name and name not in self.clients: + self.clients[name] = Client.from_conf( + name, self.peers[name], self.taskid, self.log, self.connector + ) + return self.clients[name] + + def __iter__(self) -> Iterator[str]: + return iter(self.peers) + + async def close(self) -> None: + for c in self.clients.values(): + await c.close() + await self.connector.close() + + async def __aenter__(self) -> "ClientManager": + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + +class Client: + log: BoundLogger + server_name: str + session: aiohttp.ClientSession + + def __init__( + self, + server_name: str, + url: str, + token: str, + taskid: str, + log, + connector=None, + ): + assert get_running_loop().is_running() + self.log = log.bind(subsystem="APIClient") + self.server_name = server_name + self.session = aiohttp.ClientSession( + url, + headers={hdrs.AUTHORIZATION: "Bearer " + token, "taskid": taskid}, + raise_for_status=True, + timeout=ClientTimeout(30, connect=10), + connector=connector, + connector_owner=connector is None, + ) + + @classmethod + def from_conf(cls, server_name, conf, *args, **kwargs): + return cls( + server_name, + conf["url"], + conf["token"], + *args, + **kwargs, + ) + + async def fetch_status(self, filter: str = "") -> List[StatusDict]: + async with self.session.get( + "/v1/status", params={"filter": filter} + ) as response: + jobs = await response.json() + for job in jobs: + if job["last_time"]: + job["last_time"] = datetime.datetime.fromisoformat( + job["last_time"] + ) + if job["next_time"]: + job["next_time"] = datetime.datetime.fromisoformat( + job["next_time"] + ) + return jobs + + async def reload_daemon(self): + async with self.session.post("/v1/reload"): + return + + async def get_jobs(self) -> List[dict]: + async with self.session.get("/v1/jobs") as response: + return await response.json() + + async def run_job(self, name: str): + async with self.session.post(f"/v1/jobs/{name}/run"): + return + + async def list_backups(self) -> List[str]: + async with self.session.get("/v1/backups") as response: + return await response.json() + + async def run_purge(self, name: str): + async with self.session.post(f"/v1/backups/{name}/purge"): + return + + async def touch_backup(self, name: str): + async with self.session.post(f"/v1/backups/{name}/touch"): + return + + async def get_revs( + self, backup: "backy.backup.Backup", only_clean: bool = True + ) -> List[Revision]: + async with self.session.get( + f"/v1/backups/{backup.name}/revs", + params={"only_clean": int(only_clean)}, + ) as response: + json = await response.json() + revs = [Revision.from_dict(r, backup, self.log) for r in json] + for r in revs: + r.backend_type = "" + r.orig_tags = r.tags + r.server = self.server_name + return revs + + async def put_tags(self, rev: Revision, autoremove: bool = False): + async with self.session.put( + f"/v1/backups/{rev.backup.name}/revs/{rev.uuid}/tags", + json={"old_tags": list(rev.orig_tags), "new_tags": list(rev.tags)}, + params={"autoremove": int(autoremove)}, + ): + return + + async def close(self): + await self.session.close() + + async def __aenter__(self) -> "Client": + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index bc7a83c1..a7157137 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -5,27 +5,30 @@ import os import random import subprocess +from collections import defaultdict from datetime import timedelta from pathlib import Path -from typing import TYPE_CHECKING, Literal, Optional, Set +from typing import TYPE_CHECKING, List, Literal, Optional, Set import yaml -from aiohttp import ClientError +from aiohttp import ClientConnectionError, ClientError, ClientResponseError +from aiohttp.web_exceptions import HTTPForbidden, HTTPNotFound from structlog.stdlib import BoundLogger import backy.utils - -from .backup import Backup -from .client import APIClientManager -from .ext_deps import BACKY_CMD -from .schedule import Schedule -from .utils import ( +from backy.backup import Backup +from backy.ext_deps import BACKY_CMD +from backy.revision import Revision +from backy.schedule import Schedule +from backy.utils import ( SafeFile, format_datetime_local, generate_taskid, time_or_event, ) +from .api import Client, ClientManager + if TYPE_CHECKING: from backy.daemon import BackyDaemon @@ -141,20 +144,25 @@ async def _wait_for_deadline(self) -> Optional[Literal[True]]: async def _wait_for_leader(self, next_time: datetime.datetime) -> bool: api = None try: - api = APIClientManager(self.daemon.peers, self.taskid, self.log) + api = ClientManager(self.daemon.peers, self.taskid, self.log) statuses = await asyncio.gather( - *[api[server].fetch_status(f"^{self.name}$") for server in api], + *[ + api[server].fetch_status(f"^{self.name}$") + for server in api + ], return_exceptions=True, ) leader = None leader_revs = len(self.backup.get_history(clean=True, local=True)) - leader_status: "BackyDaemon.StatusDict" + leader_status: "backy.backup.StatusDict" self.log.info("local-revs", local_revs=leader_revs) for server, status in zip(api, statuses): log = self.log.bind(server=server) if isinstance(status, BaseException): log.info( - "server-unavailable", exc_info=status, exc_style="short" + "server-unavailable", + exc_info=status, + exc_style="short", ) continue num_remote_revs = status[0]["local_revs"] @@ -226,7 +234,9 @@ async def run_forever(self) -> None: self.log.debug("loop-started") while True: self.taskid = generate_taskid() - self.log = self.log.bind(job_name=self.name, sub_taskid=self.taskid) + self.log = self.log.bind( + job_name=self.name, sub_taskid=self.taskid + ) self.backup = Backup(self.path, self.log) @@ -305,74 +315,6 @@ async def run_forever(self) -> None: self.backoff = 0 self.update_status("finished") - async def pull_metadata(self) -> None: - self.log.info("pull-metadata-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "pull", - "-c", - self.daemon.config_file, - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "pull-metadata-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("pull-metadata-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise - - async def push_metadata(self) -> None: - self.log.info("push-metadata-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "push", - "-c", - self.daemon.config_file, - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "push-metadata-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("push-metadata-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise - async def run_backup(self, tags: Set[str]) -> None: self.log.info("backup-started", tags=", ".join(tags)) proc = await asyncio.create_subprocess_exec( @@ -550,16 +492,16 @@ def stop(self) -> None: self._task = None self.update_status("") - @locked(target=".backup", mode="exclusive") + @Backup.locked(target=".backup", mode="exclusive") async def push_metadata(self, peers, taskid: str) -> int: grouped = defaultdict(list) - for r in self.clean_history: + for r in self.backup.clean_history: if r.pending_changes: grouped[r.server].append(r) self.log.info( - "push-start", changes=sum(len(l) for l in grouped.values()) + "push-start", changes=sum(len(L) for L in grouped.values()) ) - async with APIClientManager(peers, taskid, self.log) as apis: + async with ClientManager(peers, taskid, self.log) as apis: errors = await asyncio.gather( *[ self._push_metadata(apis[server], grouped[server]) @@ -569,9 +511,7 @@ async def push_metadata(self, peers, taskid: str) -> int: self.log.info("push-end", errors=sum(errors)) return sum(errors) - async def _push_metadata( - self, api: APIClient, revs: List[Revision] - ) -> bool: + async def _push_metadata(self, api: Client, revs: List[Revision]) -> bool: purge_required = False error = False for r in revs: @@ -618,10 +558,10 @@ async def _push_metadata( error = True return error - @locked(target=".backup", mode="exclusive") + @Backup.locked(target=".backup", mode="exclusive") async def pull_metadata(self, peers: dict, taskid: str) -> int: async def remove_dead_peer(): - for r in list(self.history): + for r in list(self.backup.history): if r.server and r.server not in peers: self.log.info( "pull-removing-dead-peer", @@ -632,7 +572,7 @@ async def remove_dead_peer(): return False self.log.info("pull-start") - async with APIClientManager(peers, taskid, self.log) as apis: + async with ClientManager(peers, taskid, self.log) as apis: errors = await asyncio.gather( remove_dead_peer(), *[self._pull_metadata(apis[server]) for server in apis], @@ -640,7 +580,7 @@ async def remove_dead_peer(): self.log.info("pull-end", errors=sum(errors)) return sum(errors) - async def _pull_metadata(self, api: APIClient) -> bool: + async def _pull_metadata(self, api: Client) -> bool: error = False log = self.log.bind(server=api.server_name) try: @@ -666,16 +606,16 @@ async def _pull_metadata(self, api: APIClient) -> bool: remote_revs = [] local_uuids = { - r.uuid for r in self.history if r.server == api.server_name + r.uuid for r in self.backup.history if r.server == api.server_name } remote_uuids = {r.uuid for r in remote_revs} for uuid in local_uuids - remote_uuids: log.warning("pull-removing-unknown-rev", rev_uuid=uuid) - self.find_by_uuid(uuid).remove(force=True) + self.backup.find_by_uuid(uuid).remove(force=True) for r in remote_revs: if r.uuid in local_uuids: - if r.to_dict() == self.find_by_uuid(r.uuid).to_dict(): + if r.to_dict() == self.backup.find_by_uuid(r.uuid).to_dict(): continue log.debug("pull-updating-rev", rev_uid=r.uuid) else: diff --git a/src/backy/logging.py b/src/backy/logging.py index 91bc7e7d..223a77db 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -118,7 +118,9 @@ def __getattr__(self, name): def prefix(prefix, line): - return "{}>\t".format(prefix) + line.replace("\n", "\n{}>\t".format(prefix)) + return "{}>\t".format(prefix) + line.replace( + "\n", "\n{}>\t".format(prefix) + ) class ConsoleFileRenderer: @@ -143,7 +145,9 @@ def __init__(self, min_level, pad_event=_EVENT_WIDTH): self.min_level = self.LEVELS.index(min_level.lower()) if colorama is None: print( - _MISSING.format(who=self.__class__.__name__, package="colorama") + _MISSING.format( + who=self.__class__.__name__, package="colorama" + ) ) if COLORIZED_TTY_OUTPUT: colorama.init() @@ -212,10 +216,13 @@ def write(line): level = event_dict.pop("level", None) if level is not None: write( - self._level_to_color[level] + level[0].upper() + RESET_ALL + " " + self._level_to_color[level] + + level[0].upper() + + RESET_ALL + + " " ) - job_name = event_dict.pop("job_name", "") + job_name = event_dict.pop("job_name", "-") sub_taskid = event_dict.pop("sub_taskid", None) if sub_taskid: job_name += f"[{sub_taskid}]" @@ -358,7 +365,6 @@ def init_logging( logfile: Optional[Path] = None, defaults: Optional[dict] = None, ): - console_file_renderer = ConsoleFileRenderer( min_level="trace" if verbose else "info", ) diff --git a/src/backy/rbd/backends/chunked/file.py b/src/backy/rbd/backends/chunked/file.py index f17e37eb..3cb50d76 100644 --- a/src/backy/rbd/backends/chunked/file.py +++ b/src/backy/rbd/backends/chunked/file.py @@ -162,7 +162,9 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: elif whence == io.SEEK_CUR: position = position + offset else: - raise ValueError("`whence` does not support mode {}".format(whence)) + raise ValueError( + "`whence` does not support mode {}".format(whence) + ) if position < 0: raise ValueError("Can not seek before the beginning of a file.") diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index 6a002174..c7b63276 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -43,48 +43,6 @@ def __str__(self): return self.value -def locked(target=None, mode=None): - if mode == "shared": - mode = fcntl.LOCK_SH - elif mode == "exclusive": - mode = fcntl.LOCK_EX | fcntl.LOCK_NB - else: - raise ValueError("Unknown lock mode '{}'".format(mode)) - - def wrap(f): - def locked_function(self, *args, skip_lock=False, **kw): - if skip_lock: - return f(self, *args, **kw) - if target in self._lock_fds: - raise RuntimeError("Bug: Locking is not re-entrant.") - target_path = self.path / target - if not target_path.exists(): - target_path.touch() - self._lock_fds[target] = target_path.open() - try: - fcntl.flock(self._lock_fds[target], mode) - except BlockingIOError: - self.log.warning( - "lock-no-exclusive", - _fmt_msg="Failed to get exclusive lock for '{function}'.", - function=f.__name__, - ) - raise - else: - try: - return f(self, *args, **kw) - finally: - fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) - finally: - self._lock_fds[target].close() - del self._lock_fds[target] - - locked_function.__name__ = "locked({}, {})".format(f.__name__, target) - return locked_function - - return wrap - - class RbdBackup(Backup): """A backup of a VM. @@ -121,11 +79,15 @@ def __init__(self, path: Path, log: BoundLogger): self.quarantine = QuarantineStore(self.path, self.log) + @property + def problem_reports(self): + return [f"{len(self.quarantine.report_ids)} quarantined blocks"] + ################# # Making backups - @locked(target=".backup", mode="exclusive") - @locked(target=".purge", mode="shared") + @Backup.locked(target=".backup", mode="exclusive") + @Backup.locked(target=".purge", mode="shared") def backup(self, revision: str) -> bool: self.path.joinpath("last").unlink(missing_ok=True) self.path.joinpath("last.rev").unlink(missing_ok=True) @@ -176,14 +138,14 @@ def backup(self, revision: str) -> bool: break return verified - @locked(target=".purge", mode="shared") + @Backup.locked(target=".purge", mode="shared") def verify(self, revision: str) -> None: revs = self.find_revisions(revision) self.prevent_remote_rev(revs) for r in revs: r.backend.verify() - @locked(target=".purge", mode="exclusive") + @Backup.locked(target=".purge", mode="exclusive") def gc(self) -> None: self.local_history[-1].backend.purge() self.clear_purge_pending() @@ -226,7 +188,9 @@ def backy_extract_supported(self, file: IO) -> bool: return False try: version = subprocess.check_output( - [BACKY_EXTRACT, "--version"], encoding="utf-8", errors="replace" + [BACKY_EXTRACT, "--version"], + encoding="utf-8", + errors="replace", ) if not version.startswith("backy-extract"): log.debug("unknown-version") @@ -253,7 +217,7 @@ def restore_backy_extract(self, rev: Revision, target: str) -> None: f"backy-extract failed with return code {return_code}. Maybe try `--backend python`?" ) - @locked(target=".purge", mode="shared") + @Backup.locked(target=".purge", mode="shared") def restore_file(self, source: IO, target_name: str) -> None: """Bulk-copy from open revision `source` to target file.""" self.log.debug("restore-file", source=source.name, target=target_name) @@ -265,7 +229,7 @@ def restore_file(self, source: IO, target_name: str) -> None: pass copy(source, target) - @locked(target=".purge", mode="shared") + @Backup.locked(target=".purge", mode="shared") def restore_stdout(self, source: IO) -> None: """Emit restore data to stdout (for pipe processing).""" self.log.debug("restore-stdout", source=source.name) From cd08c875d418a017a6bec6c496c09b877b3102b5 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Tue, 25 Jun 2024 17:34:24 +0200 Subject: [PATCH 06/25] snapshot: rbd cleanup --- src/backy/cli/tests/__init__.py | 0 src/backy/{ => cli}/tests/test_main.py | 0 src/backy/conftest.py | 30 -- src/backy/daemon/tests/__init__.py | 0 src/backy/{ => daemon}/tests/test_api.py | 0 src/backy/{ => daemon}/tests/test_client.py | 0 src/backy/{ => daemon}/tests/test_daemon.py | 0 .../{ => daemon}/tests/test_scheduler.py | 0 src/backy/rbd/__init__.py | 2 +- src/backy/rbd/backends/__init__.py | 37 --- src/backy/rbd/backup.py | 41 ++- .../rbd/{backends => }/chunked/__init__.py | 9 +- src/backy/rbd/{backends => }/chunked/chunk.py | 7 +- src/backy/rbd/{backends => }/chunked/file.py | 2 +- src/backy/rbd/{backends => }/chunked/store.py | 2 +- src/backy/rbd/chunked/tests/__init__.py | 0 .../chunked/tests/test_backend.py | 34 +-- .../chunked/tests/test_chunk.py | 6 +- .../{backends => }/chunked/tests/test_file.py | 6 +- src/backy/rbd/revision.py | 187 ------------ src/backy/rbd/sources/__init__.py | 9 +- src/backy/rbd/sources/ceph/diff.py | 2 +- src/backy/rbd/sources/ceph/rbd.py | 8 +- src/backy/rbd/sources/ceph/source.py | 25 +- src/backy/rbd/sources/ceph/tests/conftest.py | 4 +- .../sources/ceph/tests/test_ceph_source.py | 37 +-- src/backy/rbd/sources/ceph/tests/test_diff.py | 2 +- src/backy/rbd/sources/ceph/tests/test_rbd.py | 6 +- src/backy/rbd/sources/file.py | 25 +- src/backy/rbd/sources/flyingcircus/source.py | 8 +- .../sources/flyingcircus/tests/test_source.py | 4 +- src/backy/rbd/tests/conftest.py | 24 ++ .../backy/rbd/tests/smoketest.sh | 0 src/backy/rbd/tests/test_backup.py | 66 ++++ .../tests/test_backy-rbd.py} | 39 +-- src/backy/rbd/tests/test_main.py | 213 +++++++++++++ src/backy/{ => rbd}/tests/test_quarantine.py | 2 +- src/backy/{ => rbd}/tests/test_source.py | 6 +- src/backy/revision.py | 6 +- src/backy/tests/samples/sample1.rev | 5 - src/backy/tests/samples/sample2.rev | 4 - src/backy/tests/samples/simple_file/config | 8 - src/backy/tests/test_archive.py | 194 ------------ src/backy/tests/test_backup.py | 283 ++++++++++++------ src/backy/tests/test_fallocate.py | 43 --- src/backy/tests/test_timeout.py | 22 -- src/backy/tests/test_utils.py | 63 ++++ 47 files changed, 704 insertions(+), 767 deletions(-) create mode 100644 src/backy/cli/tests/__init__.py rename src/backy/{ => cli}/tests/test_main.py (100%) create mode 100644 src/backy/daemon/tests/__init__.py rename src/backy/{ => daemon}/tests/test_api.py (100%) rename src/backy/{ => daemon}/tests/test_client.py (100%) rename src/backy/{ => daemon}/tests/test_daemon.py (100%) rename src/backy/{ => daemon}/tests/test_scheduler.py (100%) delete mode 100644 src/backy/rbd/backends/__init__.py rename src/backy/rbd/{backends => }/chunked/__init__.py (94%) rename src/backy/rbd/{backends => }/chunked/chunk.py (98%) rename src/backy/rbd/{backends => }/chunked/file.py (99%) rename src/backy/rbd/{backends => }/chunked/store.py (98%) create mode 100644 src/backy/rbd/chunked/tests/__init__.py rename src/backy/rbd/{backends => }/chunked/tests/test_backend.py (58%) rename src/backy/rbd/{backends => }/chunked/tests/test_chunk.py (95%) rename src/backy/rbd/{backends => }/chunked/tests/test_file.py (97%) delete mode 100644 src/backy/rbd/revision.py create mode 100644 src/backy/rbd/tests/conftest.py rename smoketest.sh => src/backy/rbd/tests/smoketest.sh (100%) create mode 100644 src/backy/rbd/tests/test_backup.py rename src/backy/{tests/test_backy.py => rbd/tests/test_backy-rbd.py} (89%) create mode 100644 src/backy/rbd/tests/test_main.py rename src/backy/{ => rbd}/tests/test_quarantine.py (94%) rename src/backy/{ => rbd}/tests/test_source.py (78%) delete mode 100644 src/backy/tests/samples/sample1.rev delete mode 100644 src/backy/tests/samples/sample2.rev delete mode 100644 src/backy/tests/samples/simple_file/config delete mode 100644 src/backy/tests/test_archive.py delete mode 100644 src/backy/tests/test_fallocate.py delete mode 100644 src/backy/tests/test_timeout.py diff --git a/src/backy/cli/tests/__init__.py b/src/backy/cli/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/tests/test_main.py b/src/backy/cli/tests/test_main.py similarity index 100% rename from src/backy/tests/test_main.py rename to src/backy/cli/tests/test_main.py diff --git a/src/backy/conftest.py b/src/backy/conftest.py index 980c9448..916916c9 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -1,8 +1,6 @@ import datetime -import json import os import random -import shutil from unittest import mock from zoneinfo import ZoneInfo @@ -11,12 +9,9 @@ import backy.backup import backy.logging -import backy.main import backy.schedule from backy import utils -fixtures = os.path.dirname(__file__) + "/tests/samples" - @pytest.fixture(autouse=True, scope="session") def fix_pytest_coverage_465(): @@ -26,14 +21,6 @@ def fix_pytest_coverage_465(): ) -@pytest.fixture -def simple_file_config(tmp_path, monkeypatch, log): - shutil.copy(fixtures + "/simple_file/config", str(tmp_path)) - monkeypatch.chdir(tmp_path) - b = backy.backup.Backup(tmp_path, log) - return b - - def pytest_assertrepr_compare(op, left, right): if left.__class__.__name__ != "Ellipsis": return @@ -84,23 +71,6 @@ def schedule(): return schedule -@pytest.fixture(params=["chunked", "cowfile"]) -def backup(request, schedule, tmp_path, log): - with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: - json.dump( - { - "source": { - "type": "file", - "filename": "test", - "backend": request.param, - }, - "schedule": schedule.to_dict(), - }, - f, - ) - return backy.backup.Backup(tmp_path, log) - - @pytest.fixture(scope="session") def setup_structlog(): utils.log_data = "" diff --git a/src/backy/daemon/tests/__init__.py b/src/backy/daemon/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/tests/test_api.py b/src/backy/daemon/tests/test_api.py similarity index 100% rename from src/backy/tests/test_api.py rename to src/backy/daemon/tests/test_api.py diff --git a/src/backy/tests/test_client.py b/src/backy/daemon/tests/test_client.py similarity index 100% rename from src/backy/tests/test_client.py rename to src/backy/daemon/tests/test_client.py diff --git a/src/backy/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py similarity index 100% rename from src/backy/tests/test_daemon.py rename to src/backy/daemon/tests/test_daemon.py diff --git a/src/backy/tests/test_scheduler.py b/src/backy/daemon/tests/test_scheduler.py similarity index 100% rename from src/backy/tests/test_scheduler.py rename to src/backy/daemon/tests/test_scheduler.py diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 7aa2df5c..2757fcd2 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -79,7 +79,7 @@ def main(): parser.print_usage() sys.exit(0) - backupdir = Path() # TODO + backupdir = Path("/srv/backy/" + args.job) # TODO # Logging logging.init_logging( diff --git a/src/backy/rbd/backends/__init__.py b/src/backy/rbd/backends/__init__.py deleted file mode 100644 index b1b7c80c..00000000 --- a/src/backy/rbd/backends/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -from abc import ABC, abstractmethod -from typing import IO, TYPE_CHECKING, Optional, Type - -from structlog.stdlib import BoundLogger - -if TYPE_CHECKING: - from backy.revision import Revision - - -class BackendException(IOError): - pass - - -class BackyBackend(ABC): - @abstractmethod - def __init__(self, revision: "Revision", log: BoundLogger) -> None: - ... - - @abstractmethod - def open(self, mode: str = "rb", parent: Optional["Revision"] = None) -> IO: - ... - - def purge(self) -> None: - pass - - def verify(self) -> None: - pass - - -def select_backend(type_: str) -> Type[BackyBackend]: - match type_: - case "chunked": - from backy.backends.chunked import ChunkedFileBackend - - return ChunkedFileBackend - case _: - raise ValueError(f"Invalid backend '{type_}'") diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index c7b63276..851da92a 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -4,21 +4,20 @@ import time from enum import Enum from pathlib import Path -from typing import IO, Literal +from typing import IO -import yaml from structlog.stdlib import BoundLogger -import backy.backup.backends.chunked +import backy from ..backup import Backup -from .backends import BackendException, BackyBackend, select_backend -from .ext_deps import BACKY_EXTRACT +from ..ext_deps import BACKY_EXTRACT +from ..revision import Revision, Trust +from ..utils import CHUNK_SIZE, copy, posix_fadvise +from .chunked import ChunkedFileBackend +from .chunked.chunk import BackendException from .quarantine import QuarantineStore -from .revision import Revision, Trust, filter_schedule_tags -from .schedule import Schedule from .sources import BackySourceFactory, select_source -from .utils import CHUNK_SIZE, copy, posix_fadvise # Locking strategy: # @@ -89,6 +88,9 @@ def problem_reports(self): @Backup.locked(target=".backup", mode="exclusive") @Backup.locked(target=".purge", mode="shared") def backup(self, revision: str) -> bool: + new_revision = self.find_by_uuid(revision) + self.prevent_remote_rev([new_revision]) + self.path.joinpath("last").unlink(missing_ok=True) self.path.joinpath("last.rev").unlink(missing_ok=True) @@ -99,8 +101,7 @@ def backup(self, revision: str) -> bool: "Source is not ready (does it exist? can you access it?)" ) - new_revision = self.find_by_uuid(revision) - backend = new_revision.backend + backend = ChunkedFileBackend(new_revision, self.log) with self.source(new_revision) as source: try: source.backup(backend) @@ -134,20 +135,19 @@ def backup(self, revision: str) -> bool: for revision in reversed(self.get_history(clean=True, local=True)): if revision.trust == Trust.DISTRUSTED: self.log.warning("inconsistent") - revision.backend.verify() + backend.verify() break return verified @Backup.locked(target=".purge", mode="shared") def verify(self, revision: str) -> None: - revs = self.find_revisions(revision) - self.prevent_remote_rev(revs) - for r in revs: - r.backend.verify() + rev = self.find_by_uuid(revision) + self.prevent_remote_rev([rev]) + ChunkedFileBackend(rev, self.log).verify() @Backup.locked(target=".purge", mode="exclusive") def gc(self) -> None: - self.local_history[-1].backend.purge() + ChunkedFileBackend(self.local_history[-1], self.log).purge() self.clear_purge_pending() ################# @@ -161,8 +161,8 @@ def restore( target: str, restore_backend: RestoreBackend = RestoreBackend.AUTO, ) -> None: - r = self.find(revision) - s = r.backend.open("rb") + r = self.find_by_uuid(revision) + s = ChunkedFileBackend(r, self.log).open("rb") if restore_backend == RestoreBackend.AUTO: if self.backy_extract_supported(s): restore_backend = RestoreBackend.RUST @@ -178,11 +178,8 @@ def restore( elif restore_backend == RestoreBackend.RUST: self.restore_backy_extract(r, target) - def backy_extract_supported(self, file: IO) -> bool: + def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: log = self.log.bind(subsystem="backy-extract") - if not isinstance(file, backy.backends.chunked.File): - log.debug("unsupported-backend") - return False if file.size % CHUNK_SIZE != 0: log.debug("not-chunk-aligned") return False diff --git a/src/backy/rbd/backends/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py similarity index 94% rename from src/backy/rbd/backends/chunked/__init__.py rename to src/backy/rbd/chunked/__init__.py index 0fbf74c5..401c33f7 100644 --- a/src/backy/rbd/backends/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -6,18 +6,16 @@ from backy.revision import Revision, Trust from backy.utils import END, report_status -from .. import BackyBackend from .chunk import Chunk, Hash from .file import File from .store import Store -class ChunkedFileBackend(BackyBackend): +class ChunkedFileBackend: # multiple Backends may share the same store STORES: dict[Path, Store] = dict() def __init__(self, revision: Revision, log: BoundLogger): - assert revision.backend_type == "chunked" self.backup = revision.backup self.revision = revision path = self.backup.path / "chunks" @@ -66,10 +64,7 @@ def verify(self): # Load verified chunks to avoid duplicate work for revision in self.backup.get_history(clean=True, local=True): - if ( - revision.trust != Trust.VERIFIED - or revision.backend_type != "chunked" - ): + if revision.trust != Trust.VERIFIED: continue verified_chunks.update( type(self)(revision, self.log).open()._mapping.values() diff --git a/src/backy/rbd/backends/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py similarity index 98% rename from src/backy/rbd/backends/chunked/chunk.py rename to src/backy/rbd/chunked/chunk.py index 7939a336..950120cf 100644 --- a/src/backy/rbd/backends/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -7,8 +7,7 @@ import lzo import mmh3 -import backy.backends.chunked -from backy.backends import BackendException +import backy.rbd.chunked from backy.utils import posix_fadvise Hash: TypeAlias = str @@ -19,6 +18,10 @@ } +class BackendException(IOError): + pass + + class InconsistentHash(BackendException): def __init__(self, expected, actual): self.expected = expected diff --git a/src/backy/rbd/backends/chunked/file.py b/src/backy/rbd/chunked/file.py similarity index 99% rename from src/backy/rbd/backends/chunked/file.py rename to src/backy/rbd/chunked/file.py index 3cb50d76..48a5e168 100644 --- a/src/backy/rbd/backends/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -6,7 +6,7 @@ from collections import defaultdict from typing import Optional, Tuple -import backy.backends.chunked +import backy.rbd.chunked from .chunk import Chunk, Hash diff --git a/src/backy/rbd/backends/chunked/store.py b/src/backy/rbd/chunked/store.py similarity index 98% rename from src/backy/rbd/backends/chunked/store.py rename to src/backy/rbd/chunked/store.py index d60151a0..50c94146 100644 --- a/src/backy/rbd/backends/chunked/store.py +++ b/src/backy/rbd/chunked/store.py @@ -3,7 +3,7 @@ from structlog.stdlib import BoundLogger -from backy.backends.chunked.chunk import Hash +from backy.rbd.chunked.chunk import Hash # A chunkstore, is responsible for all revisions for a single backup, for now. # We can start having statistics later how much reuse between images is diff --git a/src/backy/rbd/chunked/tests/__init__.py b/src/backy/rbd/chunked/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/rbd/backends/chunked/tests/test_backend.py b/src/backy/rbd/chunked/tests/test_backend.py similarity index 58% rename from src/backy/rbd/backends/chunked/tests/test_backend.py rename to src/backy/rbd/chunked/tests/test_backend.py index c93c2362..bd6b3ceb 100644 --- a/src/backy/rbd/backends/chunked/tests/test_backend.py +++ b/src/backy/rbd/chunked/tests/test_backend.py @@ -1,22 +1,18 @@ -import os - -import pytest - -from backy.backends.chunked import ChunkedFileBackend +from backy.rbd.chunked import ChunkedFileBackend from backy.revision import Revision -def test_overlay(simple_file_config, log): - r = Revision.create(simple_file_config, set(), log) - assert isinstance(r.backend, ChunkedFileBackend) +def test_overlay(rbdbackup, log): + r = Revision.create(rbdbackup, set(), log) + backend = ChunkedFileBackend(r, log) # Write 1 version to the file - f = r.backend.open("w") + f = backend.open("w") f.write(b"asdf") f.close() - with r.backend.open("r") as f: + with backend.open("r") as f: assert f.read() == b"asdf" # Open the file in overlay, write to it - f = r.backend.open("o") + f = backend.open("o") assert f.read() == b"asdf" f.seek(0) f.write(b"bsdf") @@ -24,25 +20,25 @@ def test_overlay(simple_file_config, log): assert f.read() == b"bsdf" f.close() # Close the file and open it again results in the original content - f = r.backend.open("r") + f = backend.open("r") assert f.read() == b"asdf" f.close() -def test_purge(simple_file_config, log): - b = simple_file_config - r = Revision.create(b, set(), log) +def test_purge(rbdbackup, log): + r = Revision.create(rbdbackup, set(), log) + backend = ChunkedFileBackend(r, log) # Write 1 version to the file - f = r.backend.open("w") + f = backend.open("w") f.write(b"asdf") f.close() r.materialize() - remote = Revision(b, log) # remote revision without local data + remote = Revision(rbdbackup, log) # remote revision without local data remote.server = "remote" remote.materialize() - b.scan() + rbdbackup.scan() # Reassign as the scan will create a new reference - r = b.history[0] + r = rbdbackup.history[0] assert len(list(r.backend.store.ls())) == 1 r.backend.purge() assert len(list(r.backend.store.ls())) == 1 diff --git a/src/backy/rbd/backends/chunked/tests/test_chunk.py b/src/backy/rbd/chunked/tests/test_chunk.py similarity index 95% rename from src/backy/rbd/backends/chunked/tests/test_chunk.py rename to src/backy/rbd/chunked/tests/test_chunk.py index 0dc84868..b8a2578c 100644 --- a/src/backy/rbd/backends/chunked/tests/test_chunk.py +++ b/src/backy/rbd/chunked/tests/test_chunk.py @@ -3,9 +3,9 @@ import lzo import pytest -from backy.backends.chunked.chunk import Chunk, InconsistentHash, hash -from backy.backends.chunked.file import File -from backy.backends.chunked.store import Store +from backy.rbd.chunked.chunk import Chunk, InconsistentHash, hash +from backy.rbd.chunked.file import File +from backy.rbd.chunked.store import Store SPACE_CHUNK = b" " * Chunk.CHUNK_SIZE SPACE_CHUNK_HASH = "c01b5d75bfe6a1fa5bca6e492c5ab09a" diff --git a/src/backy/rbd/backends/chunked/tests/test_file.py b/src/backy/rbd/chunked/tests/test_file.py similarity index 97% rename from src/backy/rbd/backends/chunked/tests/test_file.py rename to src/backy/rbd/chunked/tests/test_file.py index adcd128e..69244bc6 100644 --- a/src/backy/rbd/backends/chunked/tests/test_file.py +++ b/src/backy/rbd/chunked/tests/test_file.py @@ -4,9 +4,9 @@ import lzo import pytest -from backy.backends.chunked.chunk import Chunk, InconsistentHash -from backy.backends.chunked.file import File -from backy.backends.chunked.store import Store +from backy.rbd.chunked.chunk import Chunk, InconsistentHash +from backy.rbd.chunked.file import File +from backy.rbd.chunked.store import Store def test_simple_open_write_read_seek(tmp_path, log): diff --git a/src/backy/rbd/revision.py b/src/backy/rbd/revision.py deleted file mode 100644 index efd46d16..00000000 --- a/src/backy/rbd/revision.py +++ /dev/null @@ -1,187 +0,0 @@ -import datetime -from enum import Enum -from pathlib import Path -from typing import IO, TYPE_CHECKING, Literal, Optional - -import shortuuid -import yaml -from structlog.stdlib import BoundLogger - -from ..revision import Revision -from . import utils -from .backends import select_backend -from .utils import SafeFile - -if TYPE_CHECKING: - from .backends import BackyBackend - from .backup import Backup - - -def filter_schedule_tags(tags): - return {t for t in tags if not t.startswith(TAG_MANUAL_PREFIX)} - - -def filter_manual_tags(tags): - return {t for t in tags if t.startswith(TAG_MANUAL_PREFIX)} - - -class RbdRevision(Revision): - backup: "Backup" - uuid: str - timestamp: datetime.datetime - stats: dict - tags: set[str] - orig_tags: set[str] - trust: Trust = Trust.TRUSTED - backend_type: Literal["cowfile", "chunked"] = "chunked" - server: str = "" - log: BoundLogger - - def __init__( - self, - backup: "Backup", - log: BoundLogger, - uuid: Optional[str] = None, - timestamp: Optional[datetime.datetime] = None, - ) -> None: - self.backup = backup - self.uuid = uuid if uuid else shortuuid.uuid() - self.timestamp = timestamp if timestamp else utils.now() - self.stats = {"bytes_written": 0} - self.tags = set() - self.orig_tags = set() - self.log = log.bind(revision_uuid=self.uuid, subsystem="revision") - - @classmethod - def create( - cls, - backup: "Backup", - tags: set[str], - log: BoundLogger, - *, - uuid: Optional[str] = None, - ) -> "Revision": - r = Revision(backup, log, uuid) - r.tags = tags - r.backend_type = backup.default_backend_type - return r - - @property - def backend(self) -> "BackyBackend": - return select_backend(self.backend_type)(self, self.log) - - @classmethod - def load(cls, file: Path, backup: "Backup", log: BoundLogger) -> "Revision": - with file.open(encoding="utf-8") as f: - metadata = yaml.safe_load(f) - r = cls.from_dict(metadata, backup, log) - return r - - @classmethod - def from_dict(cls, metadata, backup, log): - ts = metadata["timestamp"] - if isinstance(ts, str): - ts = datetime.datetime.fromisoformat(ts) - assert ts.tzinfo == datetime.timezone.utc - r = Revision(backup, log, uuid=metadata["uuid"], timestamp=ts) - r.stats = metadata.get("stats", {}) - r.tags = set(metadata.get("tags", [])) - r.orig_tags = set(metadata.get("orig_tags", [])) - r.server = metadata.get("server", "") - # Assume trusted by default to support migration - r.trust = Trust(metadata.get("trust", Trust.TRUSTED.value)) - # If the metadata does not show the backend type, then it's cowfile. - r.backend_type = metadata.get("backend_type", "cowfile") - return r - - @property - def filename(self) -> Path: - """Full pathname of the image file.""" - return self.backup.path / self.uuid - - @property - def info_filename(self) -> Path: - """Full pathname of the metadata file.""" - return self.filename.with_suffix(self.filename.suffix + ".rev") - - def materialize(self) -> None: - self.write_info() - self.writable() - - def write_info(self) -> None: - self.log.debug("writing-info", tags=", ".join(self.tags)) - with SafeFile(self.info_filename, encoding="utf-8") as f: - f.open_new("wb") - f.write("# Please use the `backy tags` subcommand to edit tags\n") - yaml.safe_dump(self.to_dict(), f) - - def to_dict(self) -> dict: - return { - "uuid": self.uuid, - "backend_type": self.backend_type, - "timestamp": self.timestamp, - "parent": getattr( - self.get_parent(), "uuid", "" - ), # compatibility with older versions - "stats": self.stats, - "trust": self.trust.value, - "tags": list(self.tags), - "orig_tags": list(self.orig_tags), - "server": self.server, - } - - @property - def pending_changes(self): - return self.server and self.tags != self.orig_tags - - def distrust(self) -> None: - assert not self.server - self.log.info("distrusted") - self.trust = Trust.DISTRUSTED - - def verify(self) -> None: - assert not self.server - self.log.info("verified") - self.trust = Trust.VERIFIED - - def remove(self, force=False) -> None: - self.log.info("remove") - if not force and self.server: - self.log.debug("remove-remote", server=self.server) - self.tags = set() - self.write_info() - else: - for filename in self.filename.parent.glob(self.filename.name + "*"): - if filename.exists(): - self.log.debug("remove-start", filename=filename) - filename.unlink() - self.log.debug("remove-end", filename=filename) - - if self in self.backup.history: - self.backup.history.remove(self) - del self.backup._by_uuid[self.uuid] - - def writable(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o640) - self.info_filename.chmod(0o640) - - def readonly(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o440) - self.info_filename.chmod(0o440) - - def get_parent(self, ignore_trust=False) -> Optional["Revision"]: - """defaults to last rev if not in history""" - prev = None - for r in self.backup.history: - if r.backend_type != self.backend_type: - continue - if not ignore_trust and r.trust == Trust.DISTRUSTED: - continue - if r.server != self.server: - continue - if r.uuid == self.uuid: - break - prev = r - return prev diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py index 23e63a3d..4bedb3c1 100644 --- a/src/backy/rbd/sources/__init__.py +++ b/src/backy/rbd/sources/__init__.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -from importlib.metadata import entry_points from typing import Type from structlog.stdlib import BoundLogger @@ -46,15 +45,17 @@ def ready(self) -> bool: def select_source(type_: str) -> Type[BackySourceFactory]: match type_: case "flyingcircus": - from backy.sources.flyingcircus.source import FlyingCircusRootDisk + from backy.rbd.sources.flyingcircus.source import ( + FlyingCircusRootDisk, + ) return FlyingCircusRootDisk case "ceph-rbd": - from backy.sources.ceph.source import CephRBD + from backy.rbd.sources.ceph.source import CephRBD return CephRBD case "file": - from backy.sources.file import File + from backy.rbd.sources.file import File return File case _: diff --git a/src/backy/rbd/sources/ceph/diff.py b/src/backy/rbd/sources/ceph/diff.py index b7c712ac..487b9e18 100644 --- a/src/backy/rbd/sources/ceph/diff.py +++ b/src/backy/rbd/sources/ceph/diff.py @@ -2,7 +2,7 @@ from collections import namedtuple from typing import IO, Optional -from backy.fallocate import punch_hole +from backy.utils import punch_hole def unpack_from(fmt, f): diff --git a/src/backy/rbd/sources/ceph/rbd.py b/src/backy/rbd/sources/ceph/rbd.py index fe0e7114..b1eeca3c 100644 --- a/src/backy/rbd/sources/ceph/rbd.py +++ b/src/backy/rbd/sources/ceph/rbd.py @@ -5,10 +5,10 @@ from structlog.stdlib import BoundLogger -import backy.sources.ceph +import backy.rbd.sources.ceph +from backy.ext_deps import RBD +from backy.utils import CHUNK_SIZE -from ...ext_deps import RBD -from ...utils import CHUNK_SIZE from .diff import RBDDiffV1 @@ -115,7 +115,7 @@ def snap_rm(self, image): @contextlib.contextmanager def export_diff(self, new: str, old: str) -> Iterator[RBDDiffV1]: self.log.info("export-diff") - if backy.sources.ceph.CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: + if backy.rbd.sources.ceph.CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: EXPORT_WHOLE_OBJECT = ["--whole-object"] else: EXPORT_WHOLE_OBJECT = [] diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index 8d6623fc..6c3c5846 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -2,11 +2,11 @@ from structlog.stdlib import BoundLogger -import backy.backends import backy.utils -from backy.revision import Revision, Trust +from backy.revision import Revision -from ...backends import BackyBackend +from ... import RbdBackup +from ...chunked import ChunkedFileBackend from ...quarantine import QuarantineReport from .. import BackySource, BackySourceContext, BackySourceFactory from .rbd import RBDClient @@ -24,13 +24,16 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): always_full: bool log: BoundLogger rbd: RBDClient + revision: Revision + backup: RbdBackup - def __init__(self, config: dict, log: BoundLogger): + def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): self.pool = config["pool"] self.image = config["image"] self.always_full = config.get("full-always", False) self.log = log.bind(subsystem="ceph") self.rbd = RBDClient(self.log) + self.backup = backup def ready(self) -> bool: """Check whether the source can be backed up. @@ -67,7 +70,7 @@ def _image_name(self) -> str: def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): self._delete_old_snapshots() - def backup(self, target: BackyBackend) -> None: + def backup(self, target: ChunkedFileBackend) -> None: if self.always_full: self.log.info("backup-always-full") self.full(target) @@ -90,7 +93,7 @@ def backup(self, target: BackyBackend) -> None: break self.diff(target, parent) - def diff(self, target: BackyBackend, parent: Revision) -> None: + def diff(self, target: ChunkedFileBackend, parent: Revision) -> None: self.log.info("diff") snap_from = "backy-" + parent.uuid snap_to = "backy-" + self.revision.uuid @@ -102,11 +105,11 @@ def diff(self, target: BackyBackend, parent: Revision) -> None: self.revision.stats["bytes_written"] = bytes # TMP Gather statistics to see where to optimize - from backy.backends.chunked.chunk import chunk_stats + from backy.rbd.chunked.chunk import chunk_stats self.revision.stats["chunk_stats"] = chunk_stats - def full(self, target: BackyBackend) -> None: + def full(self, target: ChunkedFileBackend) -> None: self.log.info("full") s = self.rbd.export( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) @@ -122,11 +125,11 @@ def full(self, target: BackyBackend) -> None: self.revision.stats["bytes_written"] = copied # TMP Gather statistics to see if we actually are aligned. - from backy.backends.chunked.chunk import chunk_stats + from backy.rbd.chunked.chunk import chunk_stats self.revision.stats["chunk_stats"] = chunk_stats - def verify(self, target: BackyBackend) -> bool: + def verify(self, target: ChunkedFileBackend) -> bool: s = self.rbd.image_reader( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) @@ -137,7 +140,7 @@ def verify(self, target: BackyBackend) -> bool: return backy.utils.files_are_roughly_equal( source, target_, - report=lambda s, t, o: self.revision.backup.quarantine.add_report( + report=lambda s, t, o: self.backup.quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/ceph/tests/conftest.py b/src/backy/rbd/sources/ceph/tests/conftest.py index 5694e1d8..6c4adf29 100644 --- a/src/backy/rbd/sources/ceph/tests/conftest.py +++ b/src/backy/rbd/sources/ceph/tests/conftest.py @@ -4,8 +4,8 @@ import pytest -import backy.sources.ceph -from backy.sources.ceph.rbd import RBDClient +import backy.rbd.sources.ceph +from backy.rbd.sources.ceph.rbd import RBDClient class CephCLIBase: diff --git a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py index 71c34923..36f988fa 100644 --- a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py +++ b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py @@ -8,11 +8,10 @@ import pytest import backy.utils -from backy.backends.chunked import ChunkedFileBackend -from backy.backends.cowfile import COWFileBackend +from backy.rbd.chunked import ChunkedFileBackend +from backy.rbd.sources import select_source +from backy.rbd.sources.ceph.source import CephRBD from backy.revision import Revision -from backy.sources import select_source -from backy.sources.ceph.source import CephRBD BLOCK = backy.utils.PUNCH_SIZE @@ -113,7 +112,7 @@ def test_choose_full_without_parent(ceph_rbd_imagesource, backup, log): revision = Revision.create(backup, set(), log) with source(revision) as s: - s.backup(revision.backend) + s.backup(ChunkedFileBackend(revision, log)) assert not source.diff.called assert source.full.called @@ -136,7 +135,7 @@ def test_choose_full_without_snapshot(ceph_rbd_imagesource, backup, log): revision2 = Revision.create(backup, set(), log) with source(revision2): - source.backup(revision2.backend) + source.backup(ChunkedFileBackend(revision2, log)) assert not source.diff.called assert source.full.called @@ -162,7 +161,7 @@ def test_choose_diff_with_snapshot(ceph_rbd_imagesource, backup, log): revision2 = Revision.create(backup, set(), log) with source(revision2): - source.backup(revision2.backend) + source.backup(ChunkedFileBackend(revision2, log)) assert source.diff.called assert not source.full.called @@ -172,7 +171,7 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): """When doing a diff backup between two revisions with snapshot, the RBDDiff needs to be called properly, a snapshot for the new revision needs to be created and the snapshot of the previous revision needs to be removed after the successfull backup.""" - from backy.sources.ceph.diff import RBDDiffV1 + from backy.rbd.sources.ceph.diff import RBDDiffV1 source = ceph_rbd_imagesource @@ -188,7 +187,7 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): ) revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - with parent.backend.open("wb") as f: + with ChunkedFileBackend(parent, log).open("wb") as f: f.write(b"asdf") backup.scan() @@ -206,7 +205,9 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): io.BytesIO(SAMPLE_RBDDIFF) ) with source(revision): - source.diff(revision.backend, revision.get_parent()) + source.diff( + ChunkedFileBackend(revision, log), revision.get_parent() + ) backup.history.append(revision) export.assert_called_with( "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", @@ -231,7 +232,7 @@ def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han likes Leia.") - backend = revision.backend + backend = ChunkedFileBackend(revision, log) with source(revision): source.full(backend) export.assert_called_with("test/foo@backy-a0") @@ -250,7 +251,7 @@ def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han loves Leia.") - backend = revision2.backend + backend = ChunkedFileBackend(revision2, log) with source(revision2): source.full(backend) @@ -283,13 +284,14 @@ def test_full_backup_integrates_changes( # check fidelity for content, rev in [(content0, rev0), (content1, rev1)]: + backend = ChunkedFileBackend(rev, log) with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(content) with source(rev): - source.full(rev.backend) + source.full(backend) export.assert_called_with("test/foo@backy-{}".format(rev.uuid)) - with rev.backend.open("rb") as f: + with backend.open("rb") as f: assert content == f.read() @@ -307,7 +309,7 @@ def test_verify_fail(backup, tmp_path, ceph_rbd_imagesource, log): with open(rbd_source, "w") as f: f.write("Han likes Leia.") - backend = revision.backend + backend = ChunkedFileBackend(revision, log) with backend.open("wb") as f: f.write(b"foobar") # The backend has false data, so this needs to be detected. @@ -331,9 +333,10 @@ def test_verify(ceph_rbd_imagesource, backup, tmp_path, log): f.write(b"Han likes Leia.") source.rbd.unmap(rbd_source) - with revision.backend.open("wb") as f: + backend = ChunkedFileBackend(revision, log) + with backend.open("wb") as f: f.write(b"Han likes Leia.") f.flush() with source(revision): - assert source.verify(revision.backend) + assert source.verify(backend) diff --git a/src/backy/rbd/sources/ceph/tests/test_diff.py b/src/backy/rbd/sources/ceph/tests/test_diff.py index fc7c24c0..a926a24c 100644 --- a/src/backy/rbd/sources/ceph/tests/test_diff.py +++ b/src/backy/rbd/sources/ceph/tests/test_diff.py @@ -5,7 +5,7 @@ import pytest -from backy.sources.ceph.diff import ( +from backy.rbd.sources.ceph.diff import ( Data, FromSnap, RBDDiffV1, diff --git a/src/backy/rbd/sources/ceph/tests/test_rbd.py b/src/backy/rbd/sources/ceph/tests/test_rbd.py index 1b0033b3..b4a30c5f 100644 --- a/src/backy/rbd/sources/ceph/tests/test_rbd.py +++ b/src/backy/rbd/sources/ceph/tests/test_rbd.py @@ -4,10 +4,10 @@ import pytest -import backy.sources.ceph +import backy.rbd.sources.ceph from backy.ext_deps import RBD -from backy.sources.ceph.diff import RBDDiffV1 -from backy.sources.ceph.rbd import RBDClient +from backy.rbd.sources.ceph.diff import RBDDiffV1 +from backy.rbd.sources.ceph.rbd import RBDClient @mock.patch("subprocess.check_output") diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py index 2312943b..142f766a 100644 --- a/src/backy/rbd/sources/file.py +++ b/src/backy/rbd/sources/file.py @@ -1,11 +1,14 @@ -from typing import Optional - from structlog.stdlib import BoundLogger -import backy.backends -from backy.quarantine import QuarantineReport -from backy.revision import Revision, Trust -from backy.sources import BackySource, BackySourceContext, BackySourceFactory +import backy.rbd.chunked +from backy.rbd import RbdBackup +from backy.rbd.quarantine import QuarantineReport +from backy.rbd.sources import ( + BackySource, + BackySourceContext, + BackySourceFactory, +) +from backy.revision import Revision from backy.utils import copy, copy_overwrite, files_are_equal @@ -13,9 +16,11 @@ class File(BackySource, BackySourceFactory, BackySourceContext): filename: str cow: bool revision: Revision + backup: RbdBackup log: BoundLogger - def __init__(self, config: dict, log: BoundLogger): + def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): + self.backup = backup self.filename = config["filename"] self.cow = config.get("cow", True) self.log = log.bind(filename=self.filename, subsystem="file") @@ -41,7 +46,7 @@ def ready(self) -> bool: return False return True - def backup(self, target: "backy.backends.BackyBackend") -> None: + def backup(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> None: self.log.debug("backup") s = open(self.filename, "rb") parent = self.revision.get_parent() @@ -55,14 +60,14 @@ def backup(self, target: "backy.backends.BackyBackend") -> None: self.revision.stats["bytes_written"] = bytes - def verify(self, target: "backy.backends.BackyBackend") -> bool: + def verify(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> bool: self.log.info("verify") s = open(self.filename, "rb") with s as source, target.open("rb") as target_: return files_are_equal( source, target_, - report=lambda s, t, o: self.revision.backup.quarantine.add_report( + report=lambda s, t, o: self.backup.quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py index bce8400a..fa8a0ce2 100644 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ b/src/backy/rbd/sources/flyingcircus/source.py @@ -5,18 +5,20 @@ import consulate from structlog.stdlib import BoundLogger -from ...utils import TimeOut, TimeOutError +from backy.utils import TimeOut, TimeOutError + +from ... import RbdBackup from ..ceph.source import CephRBD class FlyingCircusRootDisk(CephRBD): snapshot_timeout = 90 - def __init__(self, config, log: BoundLogger): + def __init__(self, config, backup: RbdBackup, log: BoundLogger): self.config = config self.vm = config["vm"] self.consul_acl_token = config.get("consul_acl_token") - super(FlyingCircusRootDisk, self).__init__(config, log) + super(FlyingCircusRootDisk, self).__init__(config, backup, log) self.log = self.log.bind(vm=self.vm, subsystem="fc-disk") def create_snapshot(self, name: str) -> None: diff --git a/src/backy/rbd/sources/flyingcircus/tests/test_source.py b/src/backy/rbd/sources/flyingcircus/tests/test_source.py index ada47038..967030b5 100644 --- a/src/backy/rbd/sources/flyingcircus/tests/test_source.py +++ b/src/backy/rbd/sources/flyingcircus/tests/test_source.py @@ -5,8 +5,8 @@ import consulate import pytest -from backy.sources import select_source -from backy.sources.flyingcircus.source import FlyingCircusRootDisk +from backy.rbd.sources import select_source +from backy.rbd.sources.flyingcircus.source import FlyingCircusRootDisk @pytest.fixture diff --git a/src/backy/rbd/tests/conftest.py b/src/backy/rbd/tests/conftest.py new file mode 100644 index 00000000..73edb153 --- /dev/null +++ b/src/backy/rbd/tests/conftest.py @@ -0,0 +1,24 @@ +import json +import os + +import pytest + +from backy.rbd import RbdBackup + +fixtures = os.path.dirname(__file__) + "/tests/samples" + + +@pytest.fixture +def rbdbackup(schedule, tmp_path, log): + with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: + json.dump( + { + "source": { + "type": "file", + "filename": "test", + }, + "schedule": schedule.to_dict(), + }, + f, + ) + return RbdBackup(tmp_path, log) diff --git a/smoketest.sh b/src/backy/rbd/tests/smoketest.sh similarity index 100% rename from smoketest.sh rename to src/backy/rbd/tests/smoketest.sh diff --git a/src/backy/rbd/tests/test_backup.py b/src/backy/rbd/tests/test_backup.py new file mode 100644 index 00000000..97f7e0a1 --- /dev/null +++ b/src/backy/rbd/tests/test_backup.py @@ -0,0 +1,66 @@ +import os.path +import subprocess +from unittest import mock + +from backy.rbd.sources.file import File +from backy.utils import CHUNK_SIZE + + +def test_config(rbdbackup, tmp_path): + assert rbdbackup.path == tmp_path + assert isinstance(rbdbackup.source, File) + assert rbdbackup.source.filename == "input-file" + + +def test_restore_target(rbdbackup): + source = "input-file" + target = "restore.img" + with open(source, "wb") as f: + f.write(b"volume contents\n") + rbdbackup.backup({"daily"}) + rbdbackup.restore("0", target) + with open(source, "rb") as s, open(target, "rb") as t: + assert s.read() == t.read() + + +def test_restore_stdout(rbdbackup, capfd): + source = "input-file" + with open(source, "wb") as f: + f.write(b"volume contents\n") + rbdbackup.backup({"daily"}) + rbdbackup.restore("0", "-") + assert not os.path.exists("-") + out, err = capfd.readouterr() + assert "volume contents\n" == out + + +def test_restore_backy_extract(rbdbackup, monkeypatch): + check_output = mock.Mock(return_value="backy-extract 1.1.0") + monkeypatch.setattr(subprocess, "check_output", check_output) + rbdbackup.restore_backy_extract = mock.Mock() + source = "input-file" + with open(source, "wb") as f: + f.write(b"a" * CHUNK_SIZE) + rbdbackup.backup({"daily"}) + rbdbackup.restore("0", "restore.img") + check_output.assert_called() + rbdbackup.restore_backy_extract.assert_called_once_with( + rbdbackup.find("0"), "restore.img" + ) + + +def test_backup_corrupted(rbdbackup): + source = "input-file" + with open(source, "wb") as f: + f.write(b"volume contents\n") + rbdbackup.backup({"daily"}) + + store = rbdbackup.history[0].backend.store + chunk_path = store.chunk_path(next(iter(store.seen))) + os.chmod(chunk_path, 0o664) + with open(chunk_path, "wb") as f: + f.write(b"invalid") + rbdbackup.backup({"daily"}) + + assert rbdbackup.history == [] + assert not os.path.exists(chunk_path) diff --git a/src/backy/tests/test_backy.py b/src/backy/rbd/tests/test_backy-rbd.py similarity index 89% rename from src/backy/tests/test_backy.py rename to src/backy/rbd/tests/test_backy-rbd.py index 5a1065c5..3f672943 100644 --- a/src/backy/tests/test_backy.py +++ b/src/backy/rbd/tests/test_backy-rbd.py @@ -3,8 +3,9 @@ import pytest -import backy.backup from backy.ext_deps import BACKY_CMD, BASH +from backy.rbd import RbdBackup +from backy.revision import Revision from backy.tests import Ellipsis @@ -37,14 +38,15 @@ def test_smoketest_internal(tmp_path, log): % source1 ).encode("utf-8") ) - backup = backy.backup.Backup(backup_dir, log) + backup = RbdBackup(backup_dir, log) # Backup first state - backup.backup({"manual:test"}) + rev1 = Revision.create(backup, {"manual:test"}, log) + backup.backup(rev1.uuid) - # Restore first state form newest revision at position 0 + # Restore first state from the newest revision restore_target = str(tmp_path / "image1.restore") - backup.restore("0", restore_target) + backup.restore(rev1.uuid, restore_target) with pytest.raises(IOError): open(backup.history[-1].filename, "wb") with pytest.raises(IOError): @@ -53,54 +55,57 @@ def test_smoketest_internal(tmp_path, log): # Backup second state backup.source.filename = source2 - backup.backup({"test"}, force=True) + rev2 = Revision.create(backup, {"test"}, log) + backup.backup(rev2.uuid) assert len(backup.history) == 2 # Restore second state from second backup which is the newest at position 0 - backup.restore("0", restore_target) + backup.restore(rev2.uuid, restore_target) d1 = open(source2, "rb").read() d2 = open(restore_target, "rb").read() assert d1 == d2 # Our original backup is now at position 1. Lets restore that again. - backup.restore("1", restore_target) + backup.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() # Backup second state again backup.source.filename = source2 - backup.backup({"manual:test"}) + rev3 = Revision.create(backup, {"manual:test"}, log) + backup.backup(rev3.uuid) assert len(backup.history) == 3 # Restore image2 from its most recent at position 0 - backup.restore("0", restore_target) + backup.restore(rev3.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Restore image2 from its previous backup, now at position 1 - backup.restore("1", restore_target) + backup.restore(rev2.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Our original backup is now at position 2. Lets restore that again. - backup.restore("2", restore_target) + backup.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() # Backup third state backup.source.filename = source3 - backup.backup({"test"}, True) + rev4 = Revision.create(backup, {"test"}, log) + backup.backup(rev4.uuid) assert len(backup.history) == 4 # Restore image3 from the most curent state - backup.restore("0", restore_target) + backup.restore(rev4.uuid, restore_target) assert open(source3, "rb").read() == open(restore_target, "rb").read() # Restore image2 from position 1 and 2 - backup.restore("1", restore_target) + backup.restore(rev3.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() - backup.restore("2", restore_target) + backup.restore(rev2.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Restore image1 from position 3 - backup.restore("3", restore_target) + backup.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py new file mode 100644 index 00000000..075da650 --- /dev/null +++ b/src/backy/rbd/tests/test_main.py @@ -0,0 +1,213 @@ +import datetime +import os +import pprint +import sys +from functools import partialmethod + +import pytest + +import backy.rbd +from backy import utils +from backy.rbd import main +from backy.tests import Ellipsis + + +@pytest.fixture +def argv(): + original = sys.argv + new = original[:1] + sys.argv = new + yield new + sys.argv = original + + +def test_display_usage(capsys, argv): + with pytest.raises(SystemExit) as exit: + main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + """\ +usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] + {client,backup,restore,purge,find,status,\ +upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} + ... +""" + == out + ) + assert err == "" + + +def test_display_client_usage(capsys, argv): + argv.append("client") + with pytest.raises(SystemExit) as exit: + main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + """\ +usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] + {jobs,status,run,runall,reload,check} ... +""" + == out + ) + assert err == "" + + +def test_display_help(capsys, argv): + argv.append("--help") + with pytest.raises(SystemExit) as exit: + main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] + {client,backup,restore,purge,find,status,\ +upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} + ... + +Backup and restore for block devices. + +positional arguments: +... +""" + ) + == out + ) + assert err == "" + + +def test_display_client_help(capsys, argv): + argv.extend(["client", "--help"]) + with pytest.raises(SystemExit) as exit: + main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] + {jobs,status,run,runall,reload,check} ... + +positional arguments: +... +""" + ) + == out + ) + assert err == "" + + +def test_verbose_logging(capsys, argv): + # This is just a smoke test to ensure the appropriate code path + # for -v is covered. + argv.extend(["-v"]) + with pytest.raises(SystemExit) as exit: + main() + assert exit.value.code == 0 + + +def print_args(*args, return_value=None, **kw): + print(args) + pprint.pprint(kw) + return return_value + + +async def async_print_args(*args, **kw): + print_args(*args, **kw) + + +@pytest.mark.parametrize("success", [False, True]) +def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): + os.makedirs(tmp_path / "backy") + os.chdir(tmp_path / "backy") + + with open(tmp_path / "backy" / "config", "wb") as f: + f.write( + """ +--- +schedule: + daily: + interval: 1d + keep: 7 +source: + type: file + filename: {} +""".format( + __file__ + ).encode( + "utf-8" + ) + ) + + monkeypatch.setattr( + backy.rbd.RbdBackup, + "backup", + partialmethod(print_args, return_value=success), + ) + argv.extend(["-v", "backup", "manual:test"]) + utils.log_data = "" + with pytest.raises(SystemExit) as exit: + main() + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +(, {'manual:test'}, False) +{} +""" + ) + == out + ) + assert ( + Ellipsis( + f"""\ +... D command/invoked args='... -v backup manual:test' +... D command/parsed func='backup' func_args={{'force': False, 'tags': 'manual:test'}} +... D quarantine/scan entries=0 +... D command/return-code code={int(not success)} +""" + ) + == utils.log_data + ) + assert exit.value.code == int(not success) + + +# TODO: test call restore, verify, gc +def test_call_unexpected_exception( + capsys, backup, argv, monkeypatch, log, tmp_path +): + def do_raise(*args, **kw): + raise RuntimeError("test") + + monkeypatch.setattr(backy.rbd.RbdBackup, "gc", do_raise) + import os + + monkeypatch.setattr(os, "_exit", lambda x: None) + + argv.extend( + ["-l", str(tmp_path / "backy.log"), "-b", str(backup.path), "gc"] + ) + utils.log_data = "" + with pytest.raises(SystemExit): + main() + out, err = capsys.readouterr() + assert "" == out + assert ( + Ellipsis( + """\ +... D command/invoked args='... -l ... -b ... status' +... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} +... E command/failed exception_class='builtins.RuntimeError' exception_msg='test' +exception>\tTraceback (most recent call last): +exception>\t File ".../src/backy/main.py", line ..., in main +exception>\t ret = func(**func_args) +exception>\t File ".../src/backy/tests/test_main.py", line ..., in do_raise +exception>\t raise RuntimeError("test") +exception>\tRuntimeError: test +""" + ) + == utils.log_data + ) diff --git a/src/backy/tests/test_quarantine.py b/src/backy/rbd/tests/test_quarantine.py similarity index 94% rename from src/backy/tests/test_quarantine.py rename to src/backy/rbd/tests/test_quarantine.py index ddf9da71..14af2ee2 100644 --- a/src/backy/tests/test_quarantine.py +++ b/src/backy/rbd/tests/test_quarantine.py @@ -1,4 +1,4 @@ -from backy.quarantine import QuarantineReport, QuarantineStore +from backy.rbd.quarantine import QuarantineReport, QuarantineStore from backy.tests import Ellipsis diff --git a/src/backy/tests/test_source.py b/src/backy/rbd/tests/test_source.py similarity index 78% rename from src/backy/tests/test_source.py rename to src/backy/rbd/tests/test_source.py index 7f254801..b4429579 100644 --- a/src/backy/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,5 +1,5 @@ -from backy.backup import Backup -from backy.sources.ceph.source import CephRBD +from backy.rbd import RbdBackup +from backy.rbd.sources.ceph.source import CephRBD def test_configure_ceph_source(tmp_path, log): @@ -17,7 +17,7 @@ def test_configure_ceph_source(tmp_path, log): image: test04 """ ) - backup = Backup(tmp_path, log) + backup = RbdBackup(tmp_path, log) assert isinstance(backup.source, CephRBD) assert backup.source.pool == "test" assert backup.source.image == "test04" diff --git a/src/backy/revision.py b/src/backy/revision.py index 10c56772..b9648ca1 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -1,18 +1,16 @@ import datetime from enum import Enum from pathlib import Path -from typing import IO, TYPE_CHECKING, Literal, Optional +from typing import TYPE_CHECKING, Optional import shortuuid import yaml from structlog.stdlib import BoundLogger from . import utils -from .backends import select_backend from .utils import SafeFile if TYPE_CHECKING: - from .backends import BackyBackend from .backup import Backup @@ -92,8 +90,6 @@ def from_dict(cls, metadata, backup, log): r.server = metadata.get("server", "") # Assume trusted by default to support migration r.trust = Trust(metadata.get("trust", Trust.TRUSTED.value)) - # If the metadata does not show the backend type, then it's cowfile. - r.backend_type = metadata.get("backend_type", "cowfile") return r @property diff --git a/src/backy/tests/samples/sample1.rev b/src/backy/tests/samples/sample1.rev deleted file mode 100644 index a980413f..00000000 --- a/src/backy/tests/samples/sample1.rev +++ /dev/null @@ -1,5 +0,0 @@ ---- - uuid: asdf - type: full - timestamp: 2015-08-01T20:00:00+00:00 - parent: diff --git a/src/backy/tests/samples/sample2.rev b/src/backy/tests/samples/sample2.rev deleted file mode 100644 index 523fadcc..00000000 --- a/src/backy/tests/samples/sample2.rev +++ /dev/null @@ -1,4 +0,0 @@ ---- - uuid: asdf2 - timestamp: 2015-08-01T21:00:00+00:00 - parent: asdf diff --git a/src/backy/tests/samples/simple_file/config b/src/backy/tests/samples/simple_file/config deleted file mode 100644 index 46d5cda4..00000000 --- a/src/backy/tests/samples/simple_file/config +++ /dev/null @@ -1,8 +0,0 @@ ---- -schedule: - daily: - interval: 1d - keep: 7 -source: - type: file - filename: input-file diff --git a/src/backy/tests/test_archive.py b/src/backy/tests/test_archive.py deleted file mode 100644 index 232d4e58..00000000 --- a/src/backy/tests/test_archive.py +++ /dev/null @@ -1,194 +0,0 @@ -import shutil - -import pytest - - -@pytest.fixture -def backup_with_revisions(backup, tmp_path): - with open(str(tmp_path / "123-0.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-0 -timestamp: 2015-08-29 00:00:00+00:00 -parent: -trust: verified -stats: {bytes_written: 14868480, duration: 31.1} -tags: [daily, weekly, monthly] -""" - ) - with open(str(tmp_path / "123-1.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-1 -timestamp: 2015-08-30 01:00:00+00:00 -parent: 123-0 -stats: {bytes_written: 1486880, duration: 3.7} -server: remote1 -tags: [daily, weekly] -""" - ) - with open(str(tmp_path / "123-2.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-2 -timestamp: 2015-08-30 02:00:00+00:00 -parent: 123-1 -stats: {} -server: remote1 -tags: [daily] -""" - ) - backup.scan() - return backup - - -def test_empty_revisions(backup): - assert backup.history == [] - - -def test_find_revision_empty(backup): - with pytest.raises(KeyError): - backup.find("-1") - with pytest.raises(KeyError): - backup.find("last") - with pytest.raises(KeyError): - backup.find("fdasfdka") - - -def test_load_revisions(backup_with_revisions): - a = backup_with_revisions - assert [x.uuid for x in a.history] == ["123-0", "123-1", "123-2"] - assert a.history[0].get_parent() is None - assert a.history[1].get_parent() is None - assert a.history[2].get_parent().uuid == "123-1" - - -def test_find_revisions(backup_with_revisions): - a = backup_with_revisions - assert a.find_revisions("all") == a.history - assert a.find_revisions("1") == [a.find("1")] - assert a.find_revisions("tag:dail") == [] - assert a.find_revisions("trust:verified") == [a.find("123-0")] - assert a.find_revisions("2..1") == [a.find("2"), a.find("1")] - assert a.find_revisions("1..2") == [a.find("2"), a.find("1")] - assert a.find_revisions("123-0..123-1") == [ - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("last(tag:daily)..123-1") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("123-1..") == [a.find("123-1"), a.find("123-2")] - assert a.find_revisions("..") == a.history - assert a.find_revisions("first..last") == a.history - assert a.find_revisions("tag:weekly") == [a.find("123-0"), a.find("123-1")] - assert a.find_revisions("1, tag:weekly") == [ - a.find("123-1"), - a.find("123-0"), - ] - assert a.find_revisions("0,2..1") == [ - a.find("123-2"), - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("2,1, 2,0,1") == [ - a.find("123-0"), - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("2015-09-01..2015-08-30") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("2015-08-30..last(last(tag:daily&clean))") == [ - a.find("123-1"), - ] - assert a.find_revisions("2015-08-30..,trust:verified") == [ - a.find("123-1"), - a.find("123-2"), - a.find("123-0"), - ] - assert a.find_revisions( - "first(trust:verified)..last(reverse(2015-08-30..))" - ) == [ - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("reverse(not(clean))") == [ - a.find("123-2"), - ] - assert a.find_revisions("last(reverse(first(123-1, 123-0)))") == [ - a.find("123-1"), - ] - assert a.find_revisions("( (first( (123-0, 123-1)) ))") == [ - a.find("123-0"), - ] - assert a.find_revisions("server:aaaa") == [] - assert a.find_revisions("server:remote1") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("local") == [ - a.find("123-0"), - ] - assert a.find_revisions("remote") == [ - a.find("123-1"), - a.find("123-2"), - ] - - -def test_find_revisions_should_raise_invalid_spec(backup_with_revisions): - a = backup_with_revisions - with pytest.raises(KeyError): - a.find_revisions("aaaa..125") - with pytest.raises(AssertionError): - a.find_revisions("last)..5") - with pytest.raises(KeyError): - a.find_revisions("clean-..,1") - with pytest.raises(KeyError): - a.find_revisions("123-") - with pytest.raises(IndexError): - a.find_revisions("first(not(all))") - with pytest.raises(KeyError): - a.find_revisions("2015-09..2015-08-30") - - -def test_find_revision(backup_with_revisions): - a = backup_with_revisions - assert a.find("last").uuid == "123-2" - with pytest.raises(KeyError): - a.find("-1") - assert a.find("0").uuid == "123-2" - assert a.find("1").uuid == "123-1" - assert a.find("2").uuid == "123-0" - - assert a.find("123-1").uuid == "123-1" - with pytest.raises(KeyError): - a.find("125-125") - - assert a.find("last(tag:daily)").uuid == "123-2" - assert a.find("last(tag:weekly)").uuid == "123-1" - assert a.find("last(tag:monthly)").uuid == "123-0" - assert a.find(" first( tag:monthly ) ").uuid == "123-0" - - -def test_get_history(backup_with_revisions): - assert 2 == len(backup_with_revisions.clean_history) - assert ( - backup_with_revisions.clean_history - == backup_with_revisions.get_history(clean=True) - ) - assert 1 == len(backup_with_revisions.local_history) - assert ( - backup_with_revisions.local_history - == backup_with_revisions.get_history(local=True) - ) - assert 1 == len(backup_with_revisions.get_history(clean=True, local=True)) - - -def test_ignore_duplicates(backup_with_revisions, tmp_path): - shutil.copy(str(tmp_path / "123-2.rev"), str(tmp_path / "123-3.rev")) - a = backup_with_revisions - a.scan() - assert 3 == len(a.history) diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index e2f68b21..3a325684 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -1,21 +1,199 @@ -import os.path -import subprocess -from unittest import mock +import shutil import pytest -import backy.utils from backy.revision import Revision -from backy.sources.file import File -from backy.utils import CHUNK_SIZE -def test_config(simple_file_config, tmp_path): - backup = simple_file_config +@pytest.fixture +def backup_with_revisions(backup, tmp_path): + with open(str(tmp_path / "123-0.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-0 +timestamp: 2015-08-29 00:00:00+00:00 +parent: +trust: verified +stats: {bytes_written: 14868480, duration: 31.1} +tags: [daily, weekly, monthly] +""" + ) + with open(str(tmp_path / "123-1.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-1 +timestamp: 2015-08-30 01:00:00+00:00 +parent: 123-0 +stats: {bytes_written: 1486880, duration: 3.7} +server: remote1 +tags: [daily, weekly] +""" + ) + with open(str(tmp_path / "123-2.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-2 +timestamp: 2015-08-30 02:00:00+00:00 +parent: 123-1 +stats: {} +server: remote1 +tags: [daily] +""" + ) + backup.scan() + return backup + - assert backup.path == tmp_path - assert isinstance(backup.source, File) - assert backup.source.filename == "input-file" +def test_empty_revisions(backup): + assert backup.history == [] + + +def test_find_revision_empty(backup): + with pytest.raises(KeyError): + backup.find("-1") + with pytest.raises(KeyError): + backup.find("last") + with pytest.raises(KeyError): + backup.find("fdasfdka") + + +def test_load_revisions(backup_with_revisions): + a = backup_with_revisions + assert [x.uuid for x in a.history] == ["123-0", "123-1", "123-2"] + assert a.history[0].get_parent() is None + assert a.history[1].get_parent() is None + assert a.history[2].get_parent().uuid == "123-1" + + +def test_find_revisions(backup_with_revisions): + a = backup_with_revisions + assert a.find_revisions("all") == a.history + assert a.find_revisions("1") == [a.find("1")] + assert a.find_revisions("tag:dail") == [] + assert a.find_revisions("trust:verified") == [a.find("123-0")] + assert a.find_revisions("2..1") == [a.find("2"), a.find("1")] + assert a.find_revisions("1..2") == [a.find("2"), a.find("1")] + assert a.find_revisions("123-0..123-1") == [ + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("last(tag:daily)..123-1") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("123-1..") == [a.find("123-1"), a.find("123-2")] + assert a.find_revisions("..") == a.history + assert a.find_revisions("first..last") == a.history + assert a.find_revisions("tag:weekly") == [a.find("123-0"), a.find("123-1")] + assert a.find_revisions("1, tag:weekly") == [ + a.find("123-1"), + a.find("123-0"), + ] + assert a.find_revisions("0,2..1") == [ + a.find("123-2"), + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("2,1, 2,0,1") == [ + a.find("123-0"), + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("2015-09-01..2015-08-30") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("2015-08-30..last(last(tag:daily&clean))") == [ + a.find("123-1"), + ] + assert a.find_revisions("2015-08-30..,trust:verified") == [ + a.find("123-1"), + a.find("123-2"), + a.find("123-0"), + ] + assert a.find_revisions( + "first(trust:verified)..last(reverse(2015-08-30..))" + ) == [ + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("reverse(not(clean))") == [ + a.find("123-2"), + ] + assert a.find_revisions("last(reverse(first(123-1, 123-0)))") == [ + a.find("123-1"), + ] + assert a.find_revisions("( (first( (123-0, 123-1)) ))") == [ + a.find("123-0"), + ] + assert a.find_revisions("server:aaaa") == [] + assert a.find_revisions("server:remote1") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("local") == [ + a.find("123-0"), + ] + assert a.find_revisions("remote") == [ + a.find("123-1"), + a.find("123-2"), + ] + + +def test_find_revisions_should_raise_invalid_spec(backup_with_revisions): + a = backup_with_revisions + with pytest.raises(KeyError): + a.find_revisions("aaaa..125") + with pytest.raises(AssertionError): + a.find_revisions("last)..5") + with pytest.raises(KeyError): + a.find_revisions("clean-..,1") + with pytest.raises(KeyError): + a.find_revisions("123-") + with pytest.raises(IndexError): + a.find_revisions("first(not(all))") + with pytest.raises(KeyError): + a.find_revisions("2015-09..2015-08-30") + + +def test_find_revision(backup_with_revisions): + a = backup_with_revisions + assert a.find("last").uuid == "123-2" + with pytest.raises(KeyError): + a.find("-1") + assert a.find("0").uuid == "123-2" + assert a.find("1").uuid == "123-1" + assert a.find("2").uuid == "123-0" + + assert a.find("123-1").uuid == "123-1" + with pytest.raises(KeyError): + a.find("125-125") + + assert a.find("last(tag:daily)").uuid == "123-2" + assert a.find("last(tag:weekly)").uuid == "123-1" + assert a.find("last(tag:monthly)").uuid == "123-0" + assert a.find(" first( tag:monthly ) ").uuid == "123-0" + + +def test_get_history(backup_with_revisions): + assert 2 == len(backup_with_revisions.clean_history) + assert ( + backup_with_revisions.clean_history + == backup_with_revisions.get_history(clean=True) + ) + assert 1 == len(backup_with_revisions.local_history) + assert ( + backup_with_revisions.local_history + == backup_with_revisions.get_history(local=True) + ) + assert 1 == len(backup_with_revisions.get_history(clean=True, local=True)) + + +def test_ignore_duplicates(backup_with_revisions, tmp_path): + shutil.copy(str(tmp_path / "123-2.rev"), str(tmp_path / "123-3.rev")) + a = backup_with_revisions + a.scan() + assert 3 == len(a.history) def test_find(simple_file_config, tmp_path, log): @@ -33,86 +211,3 @@ def test_find_should_raise_if_not_found(simple_file_config, log): backup.scan() with pytest.raises(KeyError): backup.find("no such revision") - - -def test_restore_target(simple_file_config): - backup = simple_file_config - source = "input-file" - target = "restore.img" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - backup.restore("0", target) - with open(source, "rb") as s, open(target, "rb") as t: - assert s.read() == t.read() - - -def test_restore_stdout(simple_file_config, capfd): - backup = simple_file_config - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - backup.restore("0", "-") - assert not os.path.exists("-") - out, err = capfd.readouterr() - assert "volume contents\n" == out - - -def test_restore_backy_extract(simple_file_config, monkeypatch): - check_output = mock.Mock(return_value="backy-extract 1.1.0") - monkeypatch.setattr(subprocess, "check_output", check_output) - backup = simple_file_config - backup.restore_backy_extract = mock.Mock() - source = "input-file" - with open(source, "wb") as f: - f.write(b"a" * CHUNK_SIZE) - backup.backup({"daily"}) - backup.restore("0", "restore.img") - check_output.assert_called() - backup.restore_backy_extract.assert_called_once_with( - backup.find("0"), "restore.img" - ) - - -def test_backup_corrupted(simple_file_config): - backup = simple_file_config - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - - store = backup.history[0].backend.store - chunk_path = store.chunk_path(next(iter(store.seen))) - os.chmod(chunk_path, 0o664) - with open(chunk_path, "wb") as f: - f.write(b"invalid") - backup.backup({"daily"}) - - assert backup.history == [] - assert not os.path.exists(chunk_path) - - -def test_restore_mixed_backend(simple_file_config): - backup = simple_file_config - backup.default_backend_type = "cowfile" - source = "input-file" - out = "output-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - - with open(source, "wb") as f: - f.write(b"meow\n") - backup.default_backend_type = "chunked" - backup.backup({"daily"}) - - assert len(backup.history) == 2 - - backup.restore("1", out) - with open(out, "rb") as f: - assert f.read() == b"volume contents\n" - - backup.restore("0", out) - with open(out, "rb") as f: - assert f.read() == b"meow\n" diff --git a/src/backy/tests/test_fallocate.py b/src/backy/tests/test_fallocate.py deleted file mode 100644 index a6db905d..00000000 --- a/src/backy/tests/test_fallocate.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest - -import backy.fallocate - - -@pytest.fixture -def testfile(tmp_path): - fn = str(tmp_path / "myfile") - with open(fn, "wb") as f: - f.write(b"\xde\xad\xbe\xef" * 32) - return fn - - -def test_punch_hole(testfile): - with open(testfile, "r+b") as f: - f.seek(0) - backy.fallocate.punch_hole(f, 2, 4) - f.seek(0) - assert f.read(8) == b"\xde\xad\x00\x00\x00\x00\xbe\xef" - - -def test_punch_hole_needs_length(testfile): - with pytest.raises(IOError): - with open(testfile, "r+b") as f: - backy.fallocate.punch_hole(f, 10, 0) - - -def test_punch_hole_needs_writable_file(testfile): - with pytest.raises(OSError): - with open(testfile, "rb") as f: - backy.fallocate.punch_hole(f, 0, 1) - - -def test_punch_hole_needs_nonnegative_offset(testfile): - with pytest.raises(OSError): - with open(testfile, "r+b") as f: - backy.fallocate.punch_hole(f, -1, 1) - - -def test_fake_fallocate_only_punches_holes(testfile): - with pytest.raises(NotImplementedError): - with open(testfile, "r+b") as f: - backy.fallocate._fake_fallocate(f, 0, 0, 10) diff --git a/src/backy/tests/test_timeout.py b/src/backy/tests/test_timeout.py deleted file mode 100644 index ceaaea51..00000000 --- a/src/backy/tests/test_timeout.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from backy.utils import TimeOut, TimeOutError - - -def test_timeout(capsys): - timeout = TimeOut(0.05, 0.01) - while timeout.tick(): - print("tick") - assert timeout.timed_out - out, err = capsys.readouterr() - assert "tick\ntick\ntick" in out - - -def test_raise_on_timeout(capsys): - timeout = TimeOut(0.05, 0.01, raise_on_timeout=True) - with pytest.raises(TimeOutError): - while True: - timeout.tick() - print("tick") - out, err = capsys.readouterr() - assert "tick\ntick\ntick" in out diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index 91e04723..f0858338 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -12,6 +12,10 @@ copy_overwrite, files_are_equal, files_are_roughly_equal, + punch_hole, + TimeOut, + TimeOutError, + _fake_fallocate, ) @@ -337,3 +341,62 @@ def test_unmocked_now_returns_time_time_float(): now = backy.utils.now() after = datetime.datetime.now(ZoneInfo("UTC")) assert before <= now <= after + + + +@pytest.fixture +def testfile(tmp_path): + fn = str(tmp_path / "myfile") + with open(fn, "wb") as f: + f.write(b"\xde\xad\xbe\xef" * 32) + return fn + + +def test_punch_hole(testfile): + with open(testfile, "r+b") as f: + f.seek(0) + punch_hole(f, 2, 4) + f.seek(0) + assert f.read(8) == b"\xde\xad\x00\x00\x00\x00\xbe\xef" + + +def test_punch_hole_needs_length(testfile): + with pytest.raises(IOError): + with open(testfile, "r+b") as f: + punch_hole(f, 10, 0) + + +def test_punch_hole_needs_writable_file(testfile): + with pytest.raises(OSError): + with open(testfile, "rb") as f: + punch_hole(f, 0, 1) + + +def test_punch_hole_needs_nonnegative_offset(testfile): + with pytest.raises(OSError): + with open(testfile, "r+b") as f: + punch_hole(f, -1, 1) + + +def test_fake_fallocate_only_punches_holes(testfile): + with pytest.raises(NotImplementedError): + with open(testfile, "r+b") as f: + _fake_fallocate(f, 0, 0, 10) + +def test_timeout(capsys): + timeout = TimeOut(0.05, 0.01) + while timeout.tick(): + print("tick") + assert timeout.timed_out + out, err = capsys.readouterr() + assert "tick\ntick\ntick" in out + + +def test_raise_on_timeout(capsys): + timeout = TimeOut(0.05, 0.01, raise_on_timeout=True) + with pytest.raises(TimeOutError): + while True: + timeout.tick() + print("tick") + out, err = capsys.readouterr() + assert "tick\ntick\ntick" in out From 4ae4021635acbb92d929067832e3bef1d7ac99fe Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Tue, 25 Jun 2024 17:54:40 +0200 Subject: [PATCH 07/25] snapshot: clean up client package --- src/backy/cli/__init__.py | 297 ++++++++++++++++++-------------------- src/backy/cli/client.py | 9 +- 2 files changed, 140 insertions(+), 166 deletions(-) diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index 32943346..ff85118a 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -18,10 +18,11 @@ import backy.daemon from backy import logging -from backy.backup import Backup, RestoreBackend -from backy.utils import format_datetime_local, generate_taskid +from backy.backup import Backup -from .client import APIClient, CLIClient +# XXX invert this dependency +from backy.rbd.backup import RestoreBackend +from backy.utils import format_datetime_local, generate_taskid class Command(object): @@ -151,70 +152,6 @@ def verify(self, revision: str) -> None: b = Backup(self.path, self.log) b.verify(revision) - def client( - self, - config: Path, - peer: str, - url: str, - token: str, - apifunc: str, - **kwargs, - ) -> int: - async def run() -> int: - if peer and (url or token): - self.log.error( - "client-argparse-error", - _fmt_msg="--peer conflicts with --url and --token", - ) - return 1 - if bool(url) ^ bool(token): - self.log.error( - "client-argparse-error", - _fmt_msg="--url and --token require each other", - ) - return 1 - if url and token: - api = APIClient("", url, token, self.taskid, self.log) - else: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - if peer: - if peer not in d.peers: - self.log.error( - "client-peer-unknown", - _fmt_msg="The peer {peer} is not known. " - "Select a known peer or specify --url and " - "--token.\n" - "The following peers are known: {known}", - peer=peer, - known=", ".join(d.peers.keys()), - ) - return 1 - api = APIClient.from_conf( - peer, d.peers[peer], self.taskid, self.log - ) - else: - if "token" not in d.api_cli_default: - self.log.error( - "client-missing-defaults", - _fmt_msg="The config file is missing default " - "parameters. Please specify --url and --token", - ) - return 1 - api = APIClient.from_conf( - "", d.api_cli_default, self.taskid, self.log - ) - async with CLIClient(api, self.log) as c: - try: - await getattr(c, apifunc)(**kwargs) - except ClientConnectionError: - c.log.error("connection-error", exc_style="banner") - c.log.debug("connection-error", exc_info=True) - return 1 - return 0 - - return asyncio.run(run()) - def tags( self, action: Literal["set", "add", "remove"], @@ -247,55 +184,142 @@ def expire(self) -> None: b.expire() b.warn_pending_changes() + def jobs(self, filter_re=""): + """List status of all known jobs. Optionally filter by regex.""" + + tz = format_datetime_local(None)[1] + + t = Table( + "Job", + "SLA", + "SLA overdue", + "Status", + f"Last Backup ({tz})", + "Last Tags", + Column("Last Duration", justify="right"), + f"Next Backup ({tz})", + "Next Tags", + ) + + jobs = await self.api.fetch_status(filter_re) + jobs.sort(key=lambda j: j["job"]) + for job in jobs: + overdue = ( + humanize.naturaldelta(job["sla_overdue"]) + if job["sla_overdue"] + else "-" + ) + last_duration = ( + humanize.naturaldelta(job["last_duration"]) + if job["last_duration"] + else "-" + ) + last_time = format_datetime_local(job["last_time"])[0] + next_time = format_datetime_local(job["next_time"])[0] + + t.add_row( + job["job"], + job["sla"], + overdue, + job["status"], + last_time, + job["last_tags"], + last_duration, + next_time, + job["next_tags"], + ) + backups = await self.api.list_backups() + if filter_re: + backups = list(filter(re.compile(filter_re).search, backups)) + for b in backups: + t.add_row(b, "-", "-", "Dead", "-", "", "-", "-", "") -def setup_argparser(): - return parser, client + rprint(t) + print("{} jobs shown".format(len(jobs) + len(backups))) + + def status(self): + """Show job status overview""" + t = Table("Status", "#") + state_summary: Dict[str, int] = {} + jobs = await self.api.get_jobs() + jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] + for job in jobs: + state_summary.setdefault(job["status"], 0) + state_summary[job["status"]] += 1 + + for state in sorted(state_summary): + t.add_row(state, str(state_summary[state])) + rprint(t) + + def run(self, job: str): + """Trigger immediate run for one job""" + try: + await self.api.run_job(job) + except ClientResponseError as e: + if e.status == HTTPNotFound.status_code: + self.log.error("unknown-job", job=job) + sys.exit(1) + raise + self.log.info("triggered-run", job=job) + + def runall(self): + """Trigger immediate run for all jobs""" + jobs = await self.api.get_jobs() + for job in jobs: + await self.run(job["name"]) + + def reload(self): + """Reload the configuration.""" + self.log.info("reloading-daemon") + await self.api.reload_daemon() + self.log.info("reloaded-daemon") + + def check(self): + status = await self.api.fetch_status() + + exitcode = 0 + + for job in status: + log = self.log.bind(job_name=job["job"]) + if job["manual_tags"]: + log.info( + "check-manual-tags", + manual_tags=job["manual_tags"], + ) + if job["unsynced_revs"]: + self.log.info( + "check-unsynced-revs", unsynced_revs=job["unsynced_revs"] + ) + if job["sla"] != "OK": + log.critical( + "check-sla-violation", + last_time=str(job["last_time"]), + sla_overdue=job["sla_overdue"], + ) + exitcode = max(exitcode, 2) + if job["quarantine_reports"]: + log.warning( + "check-quarantined", reports=job["quarantine_reports"] + ) + exitcode = max(exitcode, 1) + + self.log.info("check-exit", exitcode=exitcode, jobs=len(status)) + raise SystemExit(exitcode) def main(): parser = argparse.ArgumentParser( description="Backy command line client.", ) - parser.add_argument( "-v", "--verbose", action="store_true", help="verbose output" ) - - parser.add_argument( - "-l", - "--logfile", - type=Path, - help=( - "file name to write log output in. " - "(default: /var/log/backy.log for `scheduler`, " - "ignored for `client`, $backupdir/backy.log otherwise)" - ), - ) - parser.add_argument( - "-b", - "--backupdir", - default=".", - type=Path, - help=( - "directory where backups and logs are written to " - "(default: %(default)s)" - ), - ) parser.add_argument( "-t", "--taskid", default=generate_taskid(), help="id to include in log messages (default: 4 random base32 chars)", ) - - subparsers = parser.add_subparsers() - - # CLIENT - client = subparsers.add_parser( - "client", - help="Query the api", - ) - g = client.add_argument_group() g.add_argument( "-c", "--config", @@ -303,15 +327,10 @@ def main(): default="/etc/backy.conf", help="(default: %(default)s)", ) - g.add_argument("-p", "--peer", help="(default: read from config file)") - g = client.add_argument_group() - g.add_argument("--url") - g.add_argument("--token") - client.set_defaults(func="client") - client_parser = client.add_subparsers() - # CLIENT jobs - p = client_parser.add_parser("jobs", help="List status of all known jobs") + subparsers = parser.add_subparsers() + + p = subparsers.add_parser("jobs", help="List status of all known jobs") p.add_argument( "filter_re", default="", @@ -319,37 +338,26 @@ def main(): nargs="?", help="Optional job filter regex", ) - p.set_defaults(apifunc="jobs") + p.set_defaults(func="jobs") - # CLIENT status - p = client_parser.add_parser("status", help="Show job status overview") - p.set_defaults(apifunc="status") + p = subparsers.add_parser("status", help="Show job status overview") + p.set_defaults(func="status") - # CLIENT run - p = client_parser.add_parser( - "run", help="Trigger immediate run for one job" - ) + p = subparsers.add_parser("run", help="Trigger immediate run for one job") p.add_argument("job", metavar="", help="Name of the job to run") - p.set_defaults(apifunc="run") + p.set_defaults(func="run") - # CLIENT runall - p = client_parser.add_parser( + p = subparsers.add_parser( "runall", help="Trigger immediate run for all jobs" ) - p.set_defaults(apifunc="runall") - - # CLIENT reload - p = client_parser.add_parser("reload", help="Reload the configuration") - p.set_defaults(apifunc="reload") + p.set_defaults(func="runall") - # CLIENT check - p = client_parser.add_parser( + p = subparsers.add_parser( "check", help="Check whether all jobs adhere to their schedules' SLA", ) - p.set_defaults(apifunc="check") + p.set_defaults(func="check") - # BACKUP p = subparsers.add_parser( "backup", help="Perform a backup", @@ -360,7 +368,6 @@ def main(): p.add_argument("tags", help="Tags to apply to the backup") p.set_defaults(func="backup") - # RESTORE p = subparsers.add_parser( "restore", help="Restore (a given revision) to a given target", @@ -387,14 +394,12 @@ def main(): ) p.set_defaults(func="restore") - # BACKUP p = subparsers.add_parser( "purge", help="Purge the backup store (i.e. chunked) from unused data", ) p.set_defaults(func="purge") - # FIND p = subparsers.add_parser( "find", help="Print full path or uuid of specified revisions", @@ -413,7 +418,6 @@ def main(): ) p.set_defaults(func="find") - # STATUS p = subparsers.add_parser( "status", help="Show backup status. Show inventory and summary information", @@ -428,28 +432,6 @@ def main(): ) p.set_defaults(func="status") - # upgrade - p = subparsers.add_parser( - "upgrade", - help="Upgrade this backup (incl. its data) to the newest " - "supported version", - ) - p.set_defaults(func="upgrade") - - # SCHEDULER DAEMON - p = subparsers.add_parser( - "scheduler", - help="Run the scheduler", - ) - p.set_defaults(func="scheduler") - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - # DISTRUST p = subparsers.add_parser( "distrust", @@ -464,7 +446,6 @@ def main(): ) p.set_defaults(func="distrust") - # VERIFY p = subparsers.add_parser( "verify", help="Verify specified revisions", @@ -478,7 +459,6 @@ def main(): ) p.set_defaults(func="verify") - # FORGET p = subparsers.add_parser( "forget", help="Forget specified revision", @@ -492,7 +472,6 @@ def main(): ) p.set_defaults(func="forget") - # TAGS p = subparsers.add_parser( "tags", help="Modify tags on revision", @@ -529,12 +508,12 @@ def main(): ) p.set_defaults(func="tags") - # EXPIRE p = subparsers.add_parser( "expire", help="Expire tags according to schedule", ) p.set_defaults(func="expire") + args = parser.parse_args() if not hasattr(args, "func"): parser.print_usage() @@ -544,7 +523,7 @@ def main(): logging.init_logging( args.verbose, args.logfile, - defaults={"job_name": default_job_name, "taskid": args.taskid}, + defaults={"taskid": args.taskid}, ) log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py index 2ace349f..6a1a366b 100644 --- a/src/backy/cli/client.py +++ b/src/backy/cli/client.py @@ -1,19 +1,14 @@ -import datetime import re import sys -from asyncio import get_running_loop -from typing import TYPE_CHECKING, Dict, Iterator, List +from typing import TYPE_CHECKING, Dict, List -import aiohttp import humanize -from aiohttp import ClientResponseError, ClientTimeout, TCPConnector, hdrs +from aiohttp import ClientResponseError from aiohttp.web_exceptions import HTTPNotFound from rich import print as rprint from rich.table import Column, Table from structlog.stdlib import BoundLogger -import backy.backup -from backy.revision import Revision from backy.utils import format_datetime_local if TYPE_CHECKING: From c7c5db2a39026933a0820172c0097a5ba3ae82db Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Wed, 26 Jun 2024 00:59:59 +0200 Subject: [PATCH 08/25] snapshot: fix most rbd tests --- input-file | 1 + restore.img | 1 + src/backy/cli/__init__.py | 26 ++--- src/backy/cli/client.py | 3 +- .../{daemon => cli}/tests/test_client.py | 13 +-- src/backy/cli/tests/test_main.py | 5 +- src/backy/daemon/api.py | 10 +- src/backy/daemon/tests/test_api.py | 1 - src/backy/daemon/tests/test_daemon.py | 4 +- src/backy/daemon/tests/test_scheduler.py | 2 +- src/backy/rbd/__init__.py | 25 +++-- src/backy/rbd/backup.py | 2 +- src/backy/rbd/chunked/__init__.py | 2 - src/backy/rbd/chunked/tests/test_backend.py | 10 +- src/backy/rbd/{tests => }/conftest.py | 10 +- src/backy/rbd/sources/__init__.py | 9 +- src/backy/rbd/sources/ceph/source.py | 6 +- src/backy/rbd/sources/ceph/tests/conftest.py | 2 +- .../sources/ceph/tests/test_ceph_source.py | 98 ++++++++++--------- src/backy/rbd/sources/file.py | 6 +- .../{test_source.py => test_fc_source.py} | 3 +- src/backy/rbd/tests/__init__.py | 0 src/backy/rbd/tests/test_backup.py | 33 ++++--- src/backy/rbd/tests/test_backy-rbd.py | 11 ++- src/backy/rbd/tests/test_main.py | 81 +++------------ src/backy/rbd/tests/test_quarantine.py | 4 +- src/backy/tests/test_revision.py | 2 - 27 files changed, 172 insertions(+), 198 deletions(-) create mode 100644 input-file create mode 100644 restore.img rename src/backy/{daemon => cli}/tests/test_client.py (98%) rename src/backy/rbd/{tests => }/conftest.py (68%) rename src/backy/rbd/sources/flyingcircus/tests/{test_source.py => test_fc_source.py} (97%) create mode 100644 src/backy/rbd/tests/__init__.py diff --git a/input-file b/input-file new file mode 100644 index 00000000..1b67461d --- /dev/null +++ b/input-file @@ -0,0 +1 @@ +volume contents diff --git a/restore.img b/restore.img new file mode 100644 index 00000000..1b67461d --- /dev/null +++ b/restore.img @@ -0,0 +1 @@ +volume contents diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index ff85118a..de7eb16a 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -1,17 +1,17 @@ -# -*- encoding: utf-8 -*- - import argparse import asyncio import errno +import re import sys from pathlib import Path -from typing import Literal, Optional +from typing import Dict, Literal, Optional import humanize import structlog import tzlocal import yaml -from aiohttp import ClientConnectionError +from aiohttp import ClientResponseError +from aiohttp.web_exceptions import HTTPNotFound from rich import print as rprint from rich.table import Column, Table from structlog.stdlib import BoundLogger @@ -201,7 +201,7 @@ def jobs(self, filter_re=""): "Next Tags", ) - jobs = await self.api.fetch_status(filter_re) + jobs = self.api.fetch_status(filter_re) jobs.sort(key=lambda j: j["job"]) for job in jobs: overdue = ( @@ -228,7 +228,7 @@ def jobs(self, filter_re=""): next_time, job["next_tags"], ) - backups = await self.api.list_backups() + backups = self.api.list_backups() if filter_re: backups = list(filter(re.compile(filter_re).search, backups)) for b in backups: @@ -241,8 +241,8 @@ def status(self): """Show job status overview""" t = Table("Status", "#") state_summary: Dict[str, int] = {} - jobs = await self.api.get_jobs() - jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] + jobs = self.api.get_jobs() + jobs += [{"status": "Dead"} for _ in self.api.list_backups()] for job in jobs: state_summary.setdefault(job["status"], 0) state_summary[job["status"]] += 1 @@ -254,7 +254,7 @@ def status(self): def run(self, job: str): """Trigger immediate run for one job""" try: - await self.api.run_job(job) + self.api.run_job(job) except ClientResponseError as e: if e.status == HTTPNotFound.status_code: self.log.error("unknown-job", job=job) @@ -264,18 +264,18 @@ def run(self, job: str): def runall(self): """Trigger immediate run for all jobs""" - jobs = await self.api.get_jobs() + jobs = self.api.get_jobs() for job in jobs: - await self.run(job["name"]) + self.run(job["name"]) def reload(self): """Reload the configuration.""" self.log.info("reloading-daemon") - await self.api.reload_daemon() + self.api.reload_daemon() self.log.info("reloaded-daemon") def check(self): - status = await self.api.fetch_status() + status = self.api.fetch_status() exitcode = 0 diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py index 6a1a366b..698267a8 100644 --- a/src/backy/cli/client.py +++ b/src/backy/cli/client.py @@ -1,6 +1,6 @@ import re import sys -from typing import TYPE_CHECKING, Dict, List +from typing import TYPE_CHECKING, Dict, List, Optional, Pattern import humanize from aiohttp import ClientResponseError @@ -9,6 +9,7 @@ from rich.table import Column, Table from structlog.stdlib import BoundLogger +from backy.backup import StatusDict from backy.utils import format_datetime_local if TYPE_CHECKING: diff --git a/src/backy/daemon/tests/test_client.py b/src/backy/cli/tests/test_client.py similarity index 98% rename from src/backy/daemon/tests/test_client.py rename to src/backy/cli/tests/test_client.py index b96451b3..ce8efa4e 100644 --- a/src/backy/daemon/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -6,14 +6,13 @@ from aiohttp.web_exceptions import HTTPUnauthorized from backy import utils -from backy.api import BackyAPI -from backy.client import APIClient, CLIClient +from backy.cli.client import CLIClient +from backy.daemon.api import BackyAPI, Client +from backy.daemon.tests.test_daemon import daemon +from backy.rbd.quarantine import QuarantineReport from backy.revision import Revision from backy.tests import Ellipsis -from ..quarantine import QuarantineReport -from .test_daemon import daemon - @pytest.fixture def log(log): @@ -62,9 +61,7 @@ async def api_client(api, aiohttp_client, log): headers={hdrs.AUTHORIZATION: "Bearer testtoken", "taskid": "ABCD"}, raise_for_status=True, ) - api_client = APIClient( - "", "http://localhost:0", "token", "task", log - ) + api_client = Client("", "http://localhost:0", "token", "task", log) await api_client.session.close() api_client.session = client return api_client diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 46d20333..8270d30a 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -7,8 +7,6 @@ import pytest import backy.backup -import backy.client -import backy.main from backy import utils from backy.revision import Revision from backy.tests import Ellipsis @@ -551,8 +549,7 @@ def test_commands_wrapper_status_yaml( assert ( out == f"""\ -- backend_type: {backup.default_backend_type} - orig_tags: [] +- orig_tags: [] parent: '' server: '' stats: diff --git a/src/backy/daemon/api.py b/src/backy/daemon/api.py index efb0fa67..395d63e8 100644 --- a/src/backy/daemon/api.py +++ b/src/backy/daemon/api.py @@ -24,11 +24,11 @@ from backy.revision import Revision from backy.utils import generate_taskid -from .scheduler import Job - if TYPE_CHECKING: from backy.daemon import BackyDaemon + from .scheduler import Job + class BackyJSONEncoder(JSONEncoder): def default(self, o: Any) -> Any: @@ -150,8 +150,7 @@ async def get_status( ) -> aiohttp.web.StreamResponse: filter = request.query.get("filter", None) request["log"].info("get-status", filter=filter) - if filter: - filter_re = re.compile(filter) + filter_re = re.compile(filter) if filter else None return to_json(self.daemon.status(filter_re)) async def reload_daemon(self, request: web.Request): @@ -163,7 +162,7 @@ async def get_jobs(self, request: web.Request): request["log"].info("get-jobs") return to_json(list(self.daemon.jobs.values())) - async def get_job(self, request: web.Request) -> Job: + async def get_job(self, request: web.Request) -> "Job": name = request.match_info.get("job_name") if name is None: request["log"].info("empty-job") @@ -384,7 +383,6 @@ async def get_revs( json = await response.json() revs = [Revision.from_dict(r, backup, self.log) for r in json] for r in revs: - r.backend_type = "" r.orig_tags = r.tags r.server = self.server_name return revs diff --git a/src/backy/daemon/tests/test_api.py b/src/backy/daemon/tests/test_api.py index 537bfc36..fc7376b5 100644 --- a/src/backy/daemon/tests/test_api.py +++ b/src/backy/daemon/tests/test_api.py @@ -216,7 +216,6 @@ async def test_simple_sync(daemons, log): new_rev1 = b0.history[1] assert new_rev1.backup == b0 assert new_rev1.timestamp == rev1.timestamp - assert new_rev1.backend_type == "" assert new_rev1.stats == rev1.stats assert new_rev1.tags == rev1.tags assert new_rev1.orig_tags == new_rev1.tags diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index d8829ff9..40e0cddc 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -11,10 +11,10 @@ import yaml from backy import utils -from backy.backends.chunked import ChunkedFileBackend from backy.daemon import BackyDaemon +from backy.daemon.scheduler import Job +from backy.rbd.chunked import ChunkedFileBackend from backy.revision import Revision -from backy.scheduler import Job from backy.tests import Ellipsis diff --git a/src/backy/daemon/tests/test_scheduler.py b/src/backy/daemon/tests/test_scheduler.py index 987ddef3..551a5221 100644 --- a/src/backy/daemon/tests/test_scheduler.py +++ b/src/backy/daemon/tests/test_scheduler.py @@ -4,7 +4,7 @@ import pytest import backy.utils -from backy.scheduler import Job +from backy.daemon.scheduler import Job @pytest.fixture diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 2757fcd2..a95540f7 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -19,13 +19,22 @@ def main(): parser.add_argument( "-v", "--verbose", action="store_true", help="verbose output" ) + parser.add_argument( + "-b", + "--backupdir", + default=".", + type=Path, + help=( + "directory where backups and logs are written to " + "(default: %(default)s)" + ), + ) parser.add_argument( "-t", "--taskid", default=generate_taskid(), help="ID to include in log messages (default: 4 random base32 chars)", ) - parser.add_argument("-j", "--job", help="Job to work on.") subparsers = parser.add_subparsers() @@ -35,7 +44,7 @@ def main(): help="Perform a backup", ) p.set_defaults(func="backup") - parser.add_argument("-r", "--revision", help="Revision to work on.") + p.add_argument("revision", help="Revision to work on.") # RESTORE p = subparsers.add_parser( @@ -50,7 +59,7 @@ def main(): dest="restore_backend", help="(default: %(default)s)", ) - parser.add_argument("-r", "--revision", help="Revision to work on.") + p.add_argument("revision", help="Revision to work on.") p.add_argument( "target", metavar="TARGET", @@ -70,7 +79,7 @@ def main(): "verify", help="Verify specified revision", ) - parser.add_argument("-r", "--revision", help="Revision to work on.") + p.add_argument("revision", help="Revision to work on.") p.set_defaults(func="verify") args = parser.parse_args() @@ -79,23 +88,21 @@ def main(): parser.print_usage() sys.exit(0) - backupdir = Path("/srv/backy/" + args.job) # TODO - # Logging logging.init_logging( args.verbose, - backupdir / "backy.log", + args.backupdir / "backy.log", defaults={"taskid": args.taskid}, ) log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdBackup(backupdir, log) + b = RbdBackup(args.backupdir, log) # XXX scheduler? b._clean() ret = 0 - match args.fun: + match args.func: case "backup": success = b.backup(args.revision) ret = int(not success) diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index 851da92a..afe744d2 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -72,7 +72,7 @@ def __init__(self, path: Path, log: BoundLogger): type=self.config["source"]["type"], ) raise - self.source = source_factory(self.config["source"], self.log) + self.source = source_factory(self.config["source"], self, self.log) assert self.config["source"].get("backend", "chunked") == "chunked" diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py index 401c33f7..09a86555 100644 --- a/src/backy/rbd/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -49,8 +49,6 @@ def purge(self) -> None: self.log.debug("purge") used_chunks: Set[Hash] = set() for revision in self.backup.local_history: - if revision.backend_type != "chunked": - continue used_chunks.update( type(self)(revision, self.log).open()._mapping.values() ) diff --git a/src/backy/rbd/chunked/tests/test_backend.py b/src/backy/rbd/chunked/tests/test_backend.py index bd6b3ceb..52c49cd1 100644 --- a/src/backy/rbd/chunked/tests/test_backend.py +++ b/src/backy/rbd/chunked/tests/test_backend.py @@ -39,9 +39,9 @@ def test_purge(rbdbackup, log): rbdbackup.scan() # Reassign as the scan will create a new reference r = rbdbackup.history[0] - assert len(list(r.backend.store.ls())) == 1 - r.backend.purge() - assert len(list(r.backend.store.ls())) == 1 + assert len(list(backend.store.ls())) == 1 + backend.purge() + assert len(list(backend.store.ls())) == 1 r.remove() - r.backend.purge() - assert len(list(r.backend.store.ls())) == 0 + backend.purge() + assert len(list(backend.store.ls())) == 0 diff --git a/src/backy/rbd/tests/conftest.py b/src/backy/rbd/conftest.py similarity index 68% rename from src/backy/rbd/tests/conftest.py rename to src/backy/rbd/conftest.py index 73edb153..895a5c8b 100644 --- a/src/backy/rbd/tests/conftest.py +++ b/src/backy/rbd/conftest.py @@ -4,6 +4,7 @@ import pytest from backy.rbd import RbdBackup +from backy.revision import Revision fixtures = os.path.dirname(__file__) + "/tests/samples" @@ -15,10 +16,17 @@ def rbdbackup(schedule, tmp_path, log): { "source": { "type": "file", - "filename": "test", + "filename": "input-file", }, "schedule": schedule.to_dict(), }, f, ) return RbdBackup(tmp_path, log) + + +def create_rev(rbdbackup, tags): + r = Revision.create(rbdbackup, tags, rbdbackup.log) + r.materialize() + rbdbackup.scan() + return r diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py index 4bedb3c1..665d553d 100644 --- a/src/backy/rbd/sources/__init__.py +++ b/src/backy/rbd/sources/__init__.py @@ -1,10 +1,13 @@ from abc import ABC, abstractmethod -from typing import Type +from typing import TYPE_CHECKING, Type from structlog.stdlib import BoundLogger import backy.revision +if TYPE_CHECKING: + from backy.rbd import RbdBackup + class BackySource(ABC): @abstractmethod @@ -27,7 +30,9 @@ def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): class BackySourceFactory(ABC): @abstractmethod - def __init__(self, config: dict, log: BoundLogger) -> None: + def __init__( + self, config: dict, backup: "RbdBackup", log: BoundLogger + ) -> None: ... @abstractmethod diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index 6c3c5846..a4db49f0 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -25,7 +25,7 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): log: BoundLogger rbd: RBDClient revision: Revision - backup: RbdBackup + rbdbackup: RbdBackup def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): self.pool = config["pool"] @@ -33,7 +33,7 @@ def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): self.always_full = config.get("full-always", False) self.log = log.bind(subsystem="ceph") self.rbd = RBDClient(self.log) - self.backup = backup + self.rbdbackup = backup def ready(self) -> bool: """Check whether the source can be backed up. @@ -140,7 +140,7 @@ def verify(self, target: ChunkedFileBackend) -> bool: return backy.utils.files_are_roughly_equal( source, target_, - report=lambda s, t, o: self.backup.quarantine.add_report( + report=lambda s, t, o: self.rbdbackup.quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/ceph/tests/conftest.py b/src/backy/rbd/sources/ceph/tests/conftest.py index 6c4adf29..953bbbf7 100644 --- a/src/backy/rbd/sources/ceph/tests/conftest.py +++ b/src/backy/rbd/sources/ceph/tests/conftest.py @@ -245,7 +245,7 @@ def unmap(self, device): @pytest.fixture(params=[CephJewelCLI, CephLuminousCLI, CephNautilusCLI]) def rbdclient(request, tmp_path, monkeypatch, log): monkeypatch.setattr( - backy.sources.ceph, "CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF", True + backy.rbd.sources.ceph, "CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF", True ) client = RBDClient(log) diff --git a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py index 36f988fa..2f3687f8 100644 --- a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py +++ b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py @@ -28,10 +28,10 @@ def check_output(monkeypatch): @pytest.fixture -def ceph_rbd_imagesource(rbdclient, nosleep, log): +def ceph_rbd_imagesource(rbdclient, rbdbackup, nosleep, log): """Provides a CephRBD object configured for image pool/test, with rbd being mocked away and allowing snapshots on that image.""" - source = CephRBD(dict(pool="test", image="foo"), log) + source = CephRBD(dict(pool="test", image="foo"), rbdbackup, log) # rbdclient mock setup: rbdclient._ceph_cli._register_image_for_snaps("test/foo") source.rbd = rbdclient @@ -48,18 +48,18 @@ def test_select_ceph_source(): def test_assign_revision(nosleep, log): - source = CephRBD(dict(pool="test", image="foo"), log) + source = CephRBD(dict(pool="test", image="foo"), mock.Mock(), log) revision = mock.Mock() context_manager = source(revision) assert context_manager.revision is revision -def test_context_manager(backup, ceph_rbd_imagesource, log): +def test_context_manager(rbdbackup, ceph_rbd_imagesource, log): """The imagesource context manager around a backup revision must create a corresponding snapshot at enter, and clean up at exit.""" source = ceph_rbd_imagesource - revision = Revision.create(backup, set(), log, uuid="1") + revision = Revision.create(rbdbackup, set(), log, uuid="1") with source(revision): assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-1" @@ -67,7 +67,7 @@ def test_context_manager(backup, ceph_rbd_imagesource, log): def test_context_manager_cleans_out_snapshots( - ceph_rbd_imagesource, backup, log + ceph_rbd_imagesource, rbdbackup, log ): """The imagesource context manager cleans up unexpected backy snapshot revisions. Snapshots without the prefix 'backy-' are left untouched.""" @@ -78,10 +78,10 @@ def test_context_manager_cleans_out_snapshots( # unexpected revision snapshots are cleaned source.rbd.snap_create("test/foo@backy-2") - revision = Revision.create(backup, set(), log, uuid="1") + revision = Revision.create(rbdbackup, set(), log, uuid="1") with source(revision): revision.materialize() - backup.scan() + rbdbackup.scan() assert source.rbd.snap_ls("test/foo") == [ { @@ -101,7 +101,7 @@ def test_context_manager_cleans_out_snapshots( ] -def test_choose_full_without_parent(ceph_rbd_imagesource, backup, log): +def test_choose_full_without_parent(ceph_rbd_imagesource, rbdbackup, log): """When backing up a revision without a parent, a full backup needs to happen. The diff function must not be called.""" source = ceph_rbd_imagesource @@ -109,7 +109,7 @@ def test_choose_full_without_parent(ceph_rbd_imagesource, backup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision = Revision.create(backup, set(), log) + revision = Revision.create(rbdbackup, set(), log) with source(revision) as s: s.backup(ChunkedFileBackend(revision, log)) @@ -118,7 +118,7 @@ def test_choose_full_without_parent(ceph_rbd_imagesource, backup, log): assert source.full.called -def test_choose_full_without_snapshot(ceph_rbd_imagesource, backup, log): +def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdbackup, log): """When backing up a revision with an immediate parent that has no corresponding snapshot, that parent must be ignored and a full backup has to be made. The diff function must not be called.""" @@ -127,21 +127,21 @@ def test_choose_full_without_snapshot(ceph_rbd_imagesource, backup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision1 = Revision.create(backup, set(), log) + revision1 = Revision.create(rbdbackup, set(), log) revision1.materialize() - backup.scan() + rbdbackup.scan() - revision2 = Revision.create(backup, set(), log) + revision2 = Revision.create(rbdbackup, set(), log) - with source(revision2): - source.backup(ChunkedFileBackend(revision2, log)) + with source(revision2) as s: + s.backup(ChunkedFileBackend(revision2, log)) assert not source.diff.called assert source.full.called -def test_choose_diff_with_snapshot(ceph_rbd_imagesource, backup, log): +def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdbackup, log): """In an environment where a parent revision exists and has a snapshot, both revisions shall be diffed.""" source = ceph_rbd_imagesource @@ -149,25 +149,25 @@ def test_choose_diff_with_snapshot(ceph_rbd_imagesource, backup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision1 = Revision.create(backup, set(), log, uuid="a1") + revision1 = Revision.create(rbdbackup, set(), log, uuid="a1") revision1.materialize() # part of test setup: we check backy's behavior when a previous version not only # exists, but also has a snapshot source.rbd.snap_create("test/foo@backy-a1") - backup.scan() + rbdbackup.scan() - revision2 = Revision.create(backup, set(), log) + revision2 = Revision.create(rbdbackup, set(), log) - with source(revision2): - source.backup(ChunkedFileBackend(revision2, log)) + with source(revision2) as s: + s.backup(ChunkedFileBackend(revision2, log)) assert source.diff.called assert not source.full.called -def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): +def test_diff_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): """When doing a diff backup between two revisions with snapshot, the RBDDiff needs to be called properly, a snapshot for the new revision needs to be created and the snapshot of the previous revision needs to be removed after the successfull backup.""" @@ -176,21 +176,21 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): source = ceph_rbd_imagesource parent = Revision.create( - backup, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" + rbdbackup, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" ) parent.materialize() # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. revision = Revision.create( - backup, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" + rbdbackup, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" ) revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) with ChunkedFileBackend(parent, log).open("wb") as f: f.write(b"asdf") - backup.scan() + rbdbackup.scan() revision.materialize() # test setup: ensure that previous revision has a snapshot. It needs to be removed @@ -199,7 +199,9 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): "test/foo@backy-ed968696-5ab0-4fe0-af1c-14cadab44661" ) - with mock.patch("backy.sources.ceph.rbd.RBDClient.export_diff") as export: + with mock.patch( + "backy.rbd.sources.ceph.rbd.RBDClient.export_diff" + ) as export: export.return_value = mock.MagicMock() export.return_value.__enter__.return_value = RBDDiffV1( io.BytesIO(SAMPLE_RBDDIFF) @@ -208,7 +210,7 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): source.diff( ChunkedFileBackend(revision, log), revision.get_parent() ) - backup.history.append(revision) + rbdbackup.history.append(revision) export.assert_called_with( "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", @@ -221,16 +223,16 @@ def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): ) -def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): +def test_full_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log, uuid="a0") + revision = Revision.create(rbdbackup, set(), log, uuid="a0") revision.materialize() - backup.scan() + rbdbackup.scan() - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: + with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han likes Leia.") backend = ChunkedFileBackend(revision, log) with source(revision): @@ -244,12 +246,12 @@ def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): assert f.read() == b"Han likes Leia." # Now make another full backup. This overwrites the first. - revision2 = Revision.create(backup, set(), log, uuid="a1") + revision2 = Revision.create(rbdbackup, set(), log, uuid="a1") revision2.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) revision2.materialize() - backup.scan() + rbdbackup.scan() - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: + with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han loves Leia.") backend = ChunkedFileBackend(revision2, log) with source(revision2): @@ -264,7 +266,7 @@ def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): def test_full_backup_integrates_changes( - ceph_rbd_imagesource, backup, tmp_path, log + ceph_rbd_imagesource, rbdbackup, tmp_path, log ): # The backup source changes between two consecutive full backups. Both # backup images should reflect the state of the source at the time the @@ -274,18 +276,20 @@ def test_full_backup_integrates_changes( content0 = BLOCK * b"A" + BLOCK * b"B" + BLOCK * b"C" + BLOCK * b"D" content1 = BLOCK * b"A" + BLOCK * b"X" + BLOCK * b"\0" + BLOCK * b"D" - rev0 = Revision.create(backup, set(), log) + rev0 = Revision.create(rbdbackup, set(), log) rev0.materialize() - backup.scan() + rbdbackup.scan() - rev1 = Revision.create(backup, set(), log) + rev1 = Revision.create(rbdbackup, set(), log) rev1.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) rev1.materialize() # check fidelity for content, rev in [(content0, rev0), (content1, rev1)]: backend = ChunkedFileBackend(rev, log) - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: + with mock.patch( + "backy.rbd.sources.ceph.rbd.RBDClient.export" + ) as export: export.return_value = io.BytesIO(content) with source(rev): source.full(backend) @@ -295,15 +299,15 @@ def test_full_backup_integrates_changes( assert content == f.read() -def test_verify_fail(backup, tmp_path, ceph_rbd_imagesource, log): +def test_verify_fail(rbdbackup, tmp_path, ceph_rbd_imagesource, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log) + revision = Revision.create(rbdbackup, set(), log) revision.materialize() - backup.scan() + rbdbackup.scan() rbd_source = str(tmp_path / "-dev-rbd0") with open(rbd_source, "w") as f: @@ -315,18 +319,18 @@ def test_verify_fail(backup, tmp_path, ceph_rbd_imagesource, log): # The backend has false data, so this needs to be detected. with source(revision): assert not source.verify(backend) - assert len(backup.quarantine.report_ids) == 1 + assert len(rbdbackup.quarantine.report_ids) == 1 -def test_verify(ceph_rbd_imagesource, backup, tmp_path, log): +def test_verify(ceph_rbd_imagesource, rbdbackup, tmp_path, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log, uuid="a0") + revision = Revision.create(rbdbackup, set(), log, uuid="a0") revision.materialize() - backup.scan() + rbdbackup.scan() rbd_source = source.rbd.map("test/foo@backy-a0")["device"] with open(rbd_source, "wb") as f: diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py index 142f766a..ba306b0d 100644 --- a/src/backy/rbd/sources/file.py +++ b/src/backy/rbd/sources/file.py @@ -16,11 +16,11 @@ class File(BackySource, BackySourceFactory, BackySourceContext): filename: str cow: bool revision: Revision - backup: RbdBackup + rbdbackup: RbdBackup log: BoundLogger def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): - self.backup = backup + self.rbdbackup = backup self.filename = config["filename"] self.cow = config.get("cow", True) self.log = log.bind(filename=self.filename, subsystem="file") @@ -67,7 +67,7 @@ def verify(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> bool: return files_are_equal( source, target_, - report=lambda s, t, o: self.backup.quarantine.add_report( + report=lambda s, t, o: self.rbdbackup.quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/flyingcircus/tests/test_source.py b/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py similarity index 97% rename from src/backy/rbd/sources/flyingcircus/tests/test_source.py rename to src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py index 967030b5..3a5b40bd 100644 --- a/src/backy/rbd/sources/flyingcircus/tests/test_source.py +++ b/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py @@ -10,7 +10,7 @@ @pytest.fixture -def fcrd(log): +def fcrd(log, rbdbackup): return FlyingCircusRootDisk( { "pool": "test", @@ -18,6 +18,7 @@ def fcrd(log): "vm": "test01", "consul_acl_token": "12345", }, + rbdbackup, log, ) diff --git a/src/backy/rbd/tests/__init__.py b/src/backy/rbd/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/rbd/tests/test_backup.py b/src/backy/rbd/tests/test_backup.py index 97f7e0a1..594ef1e6 100644 --- a/src/backy/rbd/tests/test_backup.py +++ b/src/backy/rbd/tests/test_backup.py @@ -2,6 +2,8 @@ import subprocess from unittest import mock +from backy.rbd.chunked import ChunkedFileBackend +from backy.rbd.conftest import create_rev from backy.rbd.sources.file import File from backy.utils import CHUNK_SIZE @@ -12,55 +14,60 @@ def test_config(rbdbackup, tmp_path): assert rbdbackup.source.filename == "input-file" -def test_restore_target(rbdbackup): +def test_restore_target(rbdbackup, log): source = "input-file" target = "restore.img" with open(source, "wb") as f: f.write(b"volume contents\n") - rbdbackup.backup({"daily"}) - rbdbackup.restore("0", target) + r = create_rev(rbdbackup, {"daily"}) + rbdbackup.backup(r.uuid) + rbdbackup.restore(r.uuid, target) with open(source, "rb") as s, open(target, "rb") as t: assert s.read() == t.read() -def test_restore_stdout(rbdbackup, capfd): +def test_restore_stdout(rbdbackup, capfd, log): source = "input-file" with open(source, "wb") as f: f.write(b"volume contents\n") - rbdbackup.backup({"daily"}) - rbdbackup.restore("0", "-") + r = create_rev(rbdbackup, {"daily"}) + rbdbackup.backup(r.uuid) + rbdbackup.restore(r.uuid, "-") assert not os.path.exists("-") out, err = capfd.readouterr() assert "volume contents\n" == out -def test_restore_backy_extract(rbdbackup, monkeypatch): +def test_restore_backy_extract(rbdbackup, monkeypatch, log): check_output = mock.Mock(return_value="backy-extract 1.1.0") monkeypatch.setattr(subprocess, "check_output", check_output) rbdbackup.restore_backy_extract = mock.Mock() source = "input-file" with open(source, "wb") as f: f.write(b"a" * CHUNK_SIZE) - rbdbackup.backup({"daily"}) - rbdbackup.restore("0", "restore.img") + r = create_rev(rbdbackup, {"daily"}) + rbdbackup.backup(r.uuid) + rbdbackup.restore(r.uuid, "restore.img") check_output.assert_called() rbdbackup.restore_backy_extract.assert_called_once_with( rbdbackup.find("0"), "restore.img" ) -def test_backup_corrupted(rbdbackup): +def test_backup_corrupted(rbdbackup, log): source = "input-file" with open(source, "wb") as f: f.write(b"volume contents\n") - rbdbackup.backup({"daily"}) + r = create_rev(rbdbackup, {"daily"}) + rbdbackup.backup(r.uuid) - store = rbdbackup.history[0].backend.store + store = ChunkedFileBackend(rbdbackup.history[0], log).store chunk_path = store.chunk_path(next(iter(store.seen))) os.chmod(chunk_path, 0o664) with open(chunk_path, "wb") as f: f.write(b"invalid") - rbdbackup.backup({"daily"}) + r2 = create_rev(rbdbackup, {"daily"}) + rbdbackup.backup(r2.uuid) assert rbdbackup.history == [] assert not os.path.exists(chunk_path) diff --git a/src/backy/rbd/tests/test_backy-rbd.py b/src/backy/rbd/tests/test_backy-rbd.py index 3f672943..b62ba9e8 100644 --- a/src/backy/rbd/tests/test_backy-rbd.py +++ b/src/backy/rbd/tests/test_backy-rbd.py @@ -5,6 +5,7 @@ from backy.ext_deps import BACKY_CMD, BASH from backy.rbd import RbdBackup +from backy.rbd.conftest import create_rev from backy.revision import Revision from backy.tests import Ellipsis @@ -41,7 +42,7 @@ def test_smoketest_internal(tmp_path, log): backup = RbdBackup(backup_dir, log) # Backup first state - rev1 = Revision.create(backup, {"manual:test"}, log) + rev1 = create_rev(backup, {"manual:test"}) backup.backup(rev1.uuid) # Restore first state from the newest revision @@ -55,7 +56,7 @@ def test_smoketest_internal(tmp_path, log): # Backup second state backup.source.filename = source2 - rev2 = Revision.create(backup, {"test"}, log) + rev2 = create_rev(backup, {"test"}) backup.backup(rev2.uuid) assert len(backup.history) == 2 @@ -71,7 +72,7 @@ def test_smoketest_internal(tmp_path, log): # Backup second state again backup.source.filename = source2 - rev3 = Revision.create(backup, {"manual:test"}, log) + rev3 = create_rev(backup, {"manual:test"}) backup.backup(rev3.uuid) assert len(backup.history) == 3 @@ -89,7 +90,7 @@ def test_smoketest_internal(tmp_path, log): # Backup third state backup.source.filename = source3 - rev4 = Revision.create(backup, {"test"}, log) + rev4 = create_rev(backup, {"test"}) backup.backup(rev4.uuid) assert len(backup.history) == 4 @@ -112,7 +113,7 @@ def test_smoketest_internal(tmp_path, log): @pytest.mark.slow def test_smoketest_external(): output = subprocess.check_output( - [BASH, os.path.dirname(__file__) + "/../../../smoketest.sh"], + [BASH, os.path.dirname(__file__) + "/smoketest.sh"], env=os.environ | {"BACKY_CMD": BACKY_CMD}, ) output = output.decode("utf-8") diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 075da650..9cb9bdb2 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -28,26 +28,7 @@ def test_display_usage(capsys, argv): out, err = capsys.readouterr() assert ( """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} - ... -""" - == out - ) - assert err == "" - - -def test_display_client_usage(capsys, argv): - argv.append("client") - with pytest.raises(SystemExit) as exit: - main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... +usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] {backup,restore,gc,verify} ... """ == out ) @@ -63,10 +44,7 @@ def test_display_help(capsys, argv): assert ( Ellipsis( """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} - ... +usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] {backup,restore,gc,verify} ... Backup and restore for block devices. @@ -79,27 +57,6 @@ def test_display_help(capsys, argv): assert err == "" -def test_display_client_help(capsys, argv): - argv.extend(["client", "--help"]) - with pytest.raises(SystemExit) as exit: - main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... - -positional arguments: -... -""" - ) - == out - ) - assert err == "" - - def test_verbose_logging(capsys, argv): # This is just a smoke test to ensure the appropriate code path # for -v is covered. @@ -115,10 +72,6 @@ def print_args(*args, return_value=None, **kw): return return_value -async def async_print_args(*args, **kw): - print_args(*args, **kw) - - @pytest.mark.parametrize("success", [False, True]) def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): os.makedirs(tmp_path / "backy") @@ -147,7 +100,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): "backup", partialmethod(print_args, return_value=success), ) - argv.extend(["-v", "backup", "manual:test"]) + argv.extend(["-v", "backup", "asdf"]) utils.log_data = "" with pytest.raises(SystemExit) as exit: main() @@ -155,7 +108,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): assert ( Ellipsis( """\ -(, {'manual:test'}, False) +(, 'asdf') {} """ ) @@ -164,10 +117,9 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): assert ( Ellipsis( f"""\ -... D command/invoked args='... -v backup manual:test' -... D command/parsed func='backup' func_args={{'force': False, 'tags': 'manual:test'}} -... D quarantine/scan entries=0 -... D command/return-code code={int(not success)} +... D - command/invoked args='... -v backup asdf' +... D - quarantine/scan entries=0 +... D - command/return-code code={int(not success)} """ ) == utils.log_data @@ -177,7 +129,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): # TODO: test call restore, verify, gc def test_call_unexpected_exception( - capsys, backup, argv, monkeypatch, log, tmp_path + capsys, rbdbackup, argv, monkeypatch, log, tmp_path ): def do_raise(*args, **kw): raise RuntimeError("test") @@ -187,24 +139,23 @@ def do_raise(*args, **kw): monkeypatch.setattr(os, "_exit", lambda x: None) - argv.extend( - ["-l", str(tmp_path / "backy.log"), "-b", str(backup.path), "gc"] - ) + argv.extend(["-b", str(rbdbackup.path), "gc"]) utils.log_data = "" with pytest.raises(SystemExit): main() out, err = capsys.readouterr() + print(utils.log_data) assert "" == out assert ( Ellipsis( """\ -... D command/invoked args='... -l ... -b ... status' -... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} -... E command/failed exception_class='builtins.RuntimeError' exception_msg='test' +... D - command/invoked args='... -b ... gc' +... D - quarantine/scan entries=0 +... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): -exception>\t File ".../src/backy/main.py", line ..., in main -exception>\t ret = func(**func_args) -exception>\t File ".../src/backy/tests/test_main.py", line ..., in do_raise +exception>\t File ".../src/backy/rbd/__init__.py", line ..., in main +exception>\t b.gc() +exception>\t File ".../src/backy/rbd/tests/test_main.py", line ..., in do_raise exception>\t raise RuntimeError("test") exception>\tRuntimeError: test """ diff --git a/src/backy/rbd/tests/test_quarantine.py b/src/backy/rbd/tests/test_quarantine.py index 14af2ee2..c134b1cf 100644 --- a/src/backy/rbd/tests/test_quarantine.py +++ b/src/backy/rbd/tests/test_quarantine.py @@ -18,9 +18,9 @@ def test_quarantine(tmp_path, log, clock): timestamp: 2015-09-01 07:06:47+00:00 traceback: |- ... - File ".../src/backy/tests/test_quarantine.py", line ..., in test_quarantine + File ".../src/backy/rbd/tests/test_quarantine.py", line ..., in test_quarantine store.add_report(QuarantineReport(b"source", b"target", 3)) - File ".../src/backy/quarantine.py", line ..., in __init__ + File ".../src/backy/rbd/quarantine.py", line ..., in __init__ self.traceback = "".join(traceback.format_stack()).strip() """ ) diff --git a/src/backy/tests/test_revision.py b/src/backy/tests/test_revision.py index 498a6e7b..df9dae58 100644 --- a/src/backy/tests/test_revision.py +++ b/src/backy/tests/test_revision.py @@ -67,7 +67,6 @@ def test_store_revision_data(backup, clock, log): with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { "parent": "asdf", - "backend_type": backup.default_backend_type, "uuid": "asdf2", "stats": {"bytes_written": 0}, "tags": [], @@ -84,7 +83,6 @@ def test_store_revision_data_no_parent(backup, clock, log): with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { "parent": "", - "backend_type": backup.default_backend_type, "uuid": "asdf2", "stats": {"bytes_written": 0}, "tags": [], From 6c6c02704d4f514925f4b5a53152c3379ef4cf31 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Wed, 26 Jun 2024 11:15:11 +0200 Subject: [PATCH 09/25] snapshot: introduce repository --- pyproject.toml | 1 + src/backy/cli/__init__.py | 75 ++++++---- src/backy/cli/client.py | 2 +- src/backy/cli/tests/test_main.py | 6 +- src/backy/conftest.py | 1 - src/backy/daemon/__init__.py | 6 +- src/backy/daemon/api.py | 12 +- src/backy/daemon/scheduler.py | 10 +- src/backy/file/__init__.py | 136 +++++++++++++++++++ src/backy/rbd/__init__.py | 4 +- src/backy/rbd/backup.py | 18 +-- src/backy/rbd/conftest.py | 4 +- src/backy/rbd/sources/__init__.py | 2 +- src/backy/rbd/sources/ceph/source.py | 6 +- src/backy/rbd/sources/file.py | 6 +- src/backy/rbd/sources/flyingcircus/source.py | 4 +- src/backy/rbd/tests/test_backy-rbd.py | 4 +- src/backy/rbd/tests/test_main.py | 4 +- src/backy/rbd/tests/test_source.py | 4 +- src/backy/{backup.py => repository.py} | 31 ++++- src/backy/source.py | 17 +++ 21 files changed, 274 insertions(+), 79 deletions(-) create mode 100644 src/backy/file/__init__.py rename src/backy/{backup.py => repository.py} (95%) create mode 100644 src/backy/source.py diff --git a/pyproject.toml b/pyproject.toml index 3c38f17a..ff87633a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ backy = "backy.cli:main" backyd = "backy.daemon:main" backy-rbd = "backy.rbd:main" backy-s3 = "backy.s3:main" +backy-file = "backy.file:main" [[tool.mypy.overrides]] module = "backy.*" diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index de7eb16a..012604a0 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -1,10 +1,10 @@ import argparse -import asyncio import errno +import os import re import sys from pathlib import Path -from typing import Dict, Literal, Optional +from typing import Any, Dict, Literal, Optional import humanize import structlog @@ -18,13 +18,16 @@ import backy.daemon from backy import logging -from backy.backup import Backup +from backy.repository import Repository + +import backy.source # XXX invert this dependency from backy.rbd.backup import RestoreBackend from backy.utils import format_datetime_local, generate_taskid + class Command(object): """Proxy between CLI calls and actual backup code.""" @@ -33,12 +36,24 @@ class Command(object): log: BoundLogger def __init__(self, path: Path, taskid, log: BoundLogger): - self.path = path + self.path = path.resolve() self.taskid = taskid self.log = log + def __call__(self, cmdname: str, args: dict[str, Any]): + func = getattr(self, cmdname) + ret = func(**args) + if not isinstance(ret, int): + ret = 0 + self.log.debug("return-code", code=ret) + return ret + + def init(self, type): + source = backy.source.KNOWN_SOURCES[type] + Repository.init(self.path, self.log, source=source) + def status(self, yaml_: bool, revision: str) -> None: - revs = Backup(self.path, self.log).find_revisions(revision) + revs = Repository(self.path, self.log).find_revisions(revision) if yaml_: print(yaml.safe_dump([r.to_dict() for r in revs])) return @@ -166,7 +181,7 @@ def tags( expect_ = None else: expect_ = set(t.strip() for t in expect.split(",")) - b = backy.backup.Backup(self.path, self.log) + b = backy.repository.Repository(self.path, self.log) success = b.tags( action, revision, @@ -180,7 +195,7 @@ def tags( def expire(self) -> None: # XXX needs to update from remote API peers first (pull) - b = backy.backup.Backup(self.path, self.log) + b = backy.repository.Repository(self.path, self.log) b.expire() b.warn_pending_changes() @@ -315,21 +330,30 @@ def main(): "-v", "--verbose", action="store_true", help="verbose output" ) parser.add_argument( - "-t", - "--taskid", - default=generate_taskid(), - help="id to include in log messages (default: 4 random base32 chars)", - ) - g.add_argument( "-c", "--config", type=Path, default="/etc/backy.conf", help="(default: %(default)s)", ) + parser.add_argument( + "-C", + default=".", + type=Path, + help=( + "Run as if backy was started in instead of the current " + "working directory." + ), + ) + p.add_argument("type", + choices=list(backy.source.KNOWN_SOURCES), + help="Type of the source.") subparsers = parser.add_subparsers() + p = subparsers.add_parser("init", help="Create an empty backy repository.") + p.set_defaults(func="init") + p = subparsers.add_parser("jobs", help="List status of all known jobs") p.add_argument( "filter_re", @@ -519,34 +543,27 @@ def main(): parser.print_usage() sys.exit(0) + task_id = generate_taskid() + # Logging - logging.init_logging( - args.verbose, - args.logfile, - defaults={"taskid": args.taskid}, - ) + + logging.init_logging(args.verbose, defaults={"taskid": task_id}) log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) - command = Command(args.backupdir, args.taskid, log) - func = getattr(command, args.func) + command = Command(args.C, task_id, log) + func = args.func # Pass over to function func_args = dict(args._get_kwargs()) del func_args["func"] del func_args["verbose"] - del func_args["backupdir"] - del func_args["logfile"] - del func_args["taskid"] + del func_args["config"] + del func_args["C"] try: log.debug("parsed", func=args.func, func_args=func_args) - ret = func(**func_args) - if isinstance(ret, int): - log.debug("return-code", code=ret) - sys.exit(ret) - log.debug("successful") - sys.exit(0) + sys.exit(command(func, func_args)) except Exception: log.exception("failed") sys.exit(1) diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py index 698267a8..ca3ecbe6 100644 --- a/src/backy/cli/client.py +++ b/src/backy/cli/client.py @@ -9,7 +9,7 @@ from rich.table import Column, Table from structlog.stdlib import BoundLogger -from backy.backup import StatusDict +from backy.repository import StatusDict from backy.utils import format_datetime_local if TYPE_CHECKING: diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 8270d30a..28fa6dbb 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -6,7 +6,7 @@ import pytest -import backy.backup +import backy.repository from backy import utils from backy.revision import Revision from backy.tests import Ellipsis @@ -172,7 +172,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): ) monkeypatch.setattr( - backy.backup.Backup, + backy.repository.Repository, "backup", partialmethod(print_args, return_value=success), ) @@ -184,7 +184,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): assert ( Ellipsis( """\ -(, {'manual:test'}, False) +(, {'manual:test'}, False) {} """ ) diff --git a/src/backy/conftest.py b/src/backy/conftest.py index 916916c9..f94a30af 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -7,7 +7,6 @@ import pytest import structlog -import backy.backup import backy.logging import backy.schedule from backy import utils diff --git a/src/backy/daemon/__init__.py b/src/backy/daemon/__init__.py index e315b58a..5b4c1dc9 100644 --- a/src/backy/daemon/__init__.py +++ b/src/backy/daemon/__init__.py @@ -18,7 +18,7 @@ from structlog.stdlib import BoundLogger from backy import logging -from backy.backup import Backup, StatusDict +from backy.repository import Repository, StatusDict from backy.revision import filter_manual_tags from backy.schedule import Schedule from backy.utils import has_recent_changes, is_dir_no_symlink @@ -43,7 +43,7 @@ class BackyDaemon(object): config: dict schedules: dict[str, Schedule] jobs: dict[str, Job] - dead_backups: dict[str, Backup] + dead_backups: dict[str, Repository] backup_semaphores: dict[str, asyncio.BoundedSemaphore] log: BoundLogger @@ -142,7 +142,7 @@ def _apply_config(self): if b.name in self.jobs or not b.is_dir(follow_symlinks=False): continue try: - self.dead_backups[b.name] = Backup( + self.dead_backups[b.name] = Repository( self.base_dir / b.name, self.log.bind(job_name=b.name), ) diff --git a/src/backy/daemon/api.py b/src/backy/daemon/api.py index 395d63e8..1e0eabc4 100644 --- a/src/backy/daemon/api.py +++ b/src/backy/daemon/api.py @@ -19,8 +19,8 @@ from aiohttp.web_runner import AppRunner, TCPSite from structlog.stdlib import BoundLogger -import backy.backup -from backy.backup import Backup, StatusDict +import backy.repository +from backy.repository import Repository, StatusDict from backy.revision import Revision from backy.utils import generate_taskid @@ -187,7 +187,7 @@ async def list_backups(self, request: web.Request): async def get_backup( self, request: web.Request, allow_active: bool - ) -> Backup: + ) -> Repository: name = request.match_info.get("backup_name") request["log"].info("get-backups", name=name) if name in self.daemon.dead_backups: @@ -374,14 +374,14 @@ async def touch_backup(self, name: str): return async def get_revs( - self, backup: "backy.backup.Backup", only_clean: bool = True + self, repository: "backy.repository.Repository", only_clean: bool = True ) -> List[Revision]: async with self.session.get( - f"/v1/backups/{backup.name}/revs", + f"/v1/backups/{repository.name}/revs", params={"only_clean": int(only_clean)}, ) as response: json = await response.json() - revs = [Revision.from_dict(r, backup, self.log) for r in json] + revs = [Revision.from_dict(r, repository, self.log) for r in json] for r in revs: r.orig_tags = r.tags r.server = self.server_name diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index a7157137..5b5f23ac 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -16,7 +16,7 @@ from structlog.stdlib import BoundLogger import backy.utils -from backy.backup import Backup +from backy.repository import Repository from backy.ext_deps import BACKY_CMD from backy.revision import Revision from backy.schedule import Schedule @@ -41,7 +41,7 @@ class Job(object): next_time: Optional[datetime.datetime] = None next_tags: Optional[set[str]] = None path: Path - backup: Backup + backup: Repository logfile: Path last_config: Optional[dict] = None daemon: "BackyDaemon" @@ -154,7 +154,7 @@ async def _wait_for_leader(self, next_time: datetime.datetime) -> bool: ) leader = None leader_revs = len(self.backup.get_history(clean=True, local=True)) - leader_status: "backy.backup.StatusDict" + leader_status: "backy.repository.StatusDict" self.log.info("local-revs", local_revs=leader_revs) for server, status in zip(api, statuses): log = self.log.bind(server=server) @@ -492,7 +492,7 @@ def stop(self) -> None: self._task = None self.update_status("") - @Backup.locked(target=".backup", mode="exclusive") + @Repository.locked(target=".backup", mode="exclusive") async def push_metadata(self, peers, taskid: str) -> int: grouped = defaultdict(list) for r in self.backup.clean_history: @@ -558,7 +558,7 @@ async def _push_metadata(self, api: Client, revs: List[Revision]) -> bool: error = True return error - @Backup.locked(target=".backup", mode="exclusive") + @Repository.locked(target=".backup", mode="exclusive") async def pull_metadata(self, peers: dict, taskid: str) -> int: async def remove_dead_peer(): for r in list(self.backup.history): diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py new file mode 100644 index 00000000..4e2d3d96 --- /dev/null +++ b/src/backy/file/__init__.py @@ -0,0 +1,136 @@ +import argparse +import errno +import os +import sys +from pathlib import Path +from structlog.stdlib import BoundLogger + +import structlog + +from backy.utils import generate_taskid + +from .. import logging + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from backy.repository import Repository + +class FileSource: + type_ = "file" + + @classmethod + def init(cls, repository: Repository, log: BoundLogger): + return {'type': self.type_} + + +def main(): + parser = argparse.ArgumentParser( + description="Backup and restore for block devices.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + parser.add_argument( + "-c", + "--config", + type=Path, + default="/etc/backy.conf", + help="(default: %(default)s)", + ) + parser.add_argument( + "-C", + default=".", + type=Path, + help=( + "Run as if backy was started in instead of the current " + "working directory." + ), + ) + parser.add_argument( + "-t", + "--taskid", + default=generate_taskid(), + help="ID to include in log messages (default: 4 random base32 chars)", + ) + + subparsers = parser.add_subparsers() + + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.set_defaults(func="backup") + p.add_argument("revision", help="Revision to create.") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target", + ) + p.add_argument("revision", help="Revision to restore.") + p.add_argument( + "target", + metavar="TARGET", + help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', + ) + p.set_defaults(func="restore") + + # GC + p = subparsers.add_parser( + "gc", + help="Purge the backup store from unused data", + ) + p.set_defaults(func="gc") + + # VERIFY + p = subparsers.add_parser( + "verify", + help="Verify specified revision", + ) + p.add_argument("revision", help="Revision to work on.") + p.set_defaults(func="verify") + + args = parser.parse_args() + + os.chdir(args.C) + + if not hasattr(args, "func"): + parser.print_usage() + sys.exit(0) + + # Logging + logging.init_logging( + args.verbose, + args.backupdir / "backy.log", + defaults={"taskid": args.taskid}, + ) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(sys.argv)) + + try: + b = RbdBackup(args.backupdir, log) + # XXX scheduler? + b._clean() + ret = 0 + match args.func: + case "backup": + success = b.backup(args.revision) + ret = int(not success) + case "restore": + b.restore(args.revisions, args.target, args.backend) + case "gc": + b.gc() + case "verify": + b.verify(args.revision) + case _: + raise ValueError("invalid function: " + args.fun) + log.debug("return-code", code=ret) + sys.exit(ret) + except Exception as e: + if isinstance(e, IOError) and e.errno in [errno.EDEADLK, errno.EAGAIN]: + log.warning("backup-currently-locked") + else: + log.exception("failed") + sys.exit(1) diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index a95540f7..7f3fc477 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -8,7 +8,7 @@ from backy.utils import generate_taskid from .. import logging -from .backup import RbdBackup, RestoreBackend +from .backup import RbdSource, RestoreBackend def main(): @@ -98,7 +98,7 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdBackup(args.backupdir, log) + b = RbdSource(args.backupdir, log) # XXX scheduler? b._clean() ret = 0 diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index afe744d2..61aa9cfe 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -1,4 +1,3 @@ -import fcntl import os import subprocess import time @@ -10,9 +9,10 @@ import backy -from ..backup import Backup from ..ext_deps import BACKY_EXTRACT +from ..repository import Repository from ..revision import Revision, Trust +from ..source import Source from ..utils import CHUNK_SIZE, copy, posix_fadvise from .chunked import ChunkedFileBackend from .chunked.chunk import BackendException @@ -42,7 +42,7 @@ def __str__(self): return self.value -class RbdBackup(Backup): +class RbdSource(Source): """A backup of a VM. Provides access to methods to @@ -85,8 +85,8 @@ def problem_reports(self): ################# # Making backups - @Backup.locked(target=".backup", mode="exclusive") - @Backup.locked(target=".purge", mode="shared") + @Repository.locked(target=".backup", mode="exclusive") + @Repository.locked(target=".purge", mode="shared") def backup(self, revision: str) -> bool: new_revision = self.find_by_uuid(revision) self.prevent_remote_rev([new_revision]) @@ -139,13 +139,13 @@ def backup(self, revision: str) -> bool: break return verified - @Backup.locked(target=".purge", mode="shared") + @Repository.locked(target=".purge", mode="shared") def verify(self, revision: str) -> None: rev = self.find_by_uuid(revision) self.prevent_remote_rev([rev]) ChunkedFileBackend(rev, self.log).verify() - @Backup.locked(target=".purge", mode="exclusive") + @Repository.locked(target=".purge", mode="exclusive") def gc(self) -> None: ChunkedFileBackend(self.local_history[-1], self.log).purge() self.clear_purge_pending() @@ -214,7 +214,7 @@ def restore_backy_extract(self, rev: Revision, target: str) -> None: f"backy-extract failed with return code {return_code}. Maybe try `--backend python`?" ) - @Backup.locked(target=".purge", mode="shared") + @Repository.locked(target=".purge", mode="shared") def restore_file(self, source: IO, target_name: str) -> None: """Bulk-copy from open revision `source` to target file.""" self.log.debug("restore-file", source=source.name, target=target_name) @@ -226,7 +226,7 @@ def restore_file(self, source: IO, target_name: str) -> None: pass copy(source, target) - @Backup.locked(target=".purge", mode="shared") + @Repository.locked(target=".purge", mode="shared") def restore_stdout(self, source: IO) -> None: """Emit restore data to stdout (for pipe processing).""" self.log.debug("restore-stdout", source=source.name) diff --git a/src/backy/rbd/conftest.py b/src/backy/rbd/conftest.py index 895a5c8b..e0b021d2 100644 --- a/src/backy/rbd/conftest.py +++ b/src/backy/rbd/conftest.py @@ -3,7 +3,7 @@ import pytest -from backy.rbd import RbdBackup +from backy.rbd import RbdSource from backy.revision import Revision fixtures = os.path.dirname(__file__) + "/tests/samples" @@ -22,7 +22,7 @@ def rbdbackup(schedule, tmp_path, log): }, f, ) - return RbdBackup(tmp_path, log) + return RbdSource(tmp_path, log) def create_rev(rbdbackup, tags): diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py index 665d553d..9665fe62 100644 --- a/src/backy/rbd/sources/__init__.py +++ b/src/backy/rbd/sources/__init__.py @@ -6,7 +6,7 @@ import backy.revision if TYPE_CHECKING: - from backy.rbd import RbdBackup + from backy.rbd import RbdSource class BackySource(ABC): diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index a4db49f0..2835f038 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -5,7 +5,7 @@ import backy.utils from backy.revision import Revision -from ... import RbdBackup +from ... import RbdSource from ...chunked import ChunkedFileBackend from ...quarantine import QuarantineReport from .. import BackySource, BackySourceContext, BackySourceFactory @@ -25,9 +25,9 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): log: BoundLogger rbd: RBDClient revision: Revision - rbdbackup: RbdBackup + rbdbackup: RbdSource - def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): + def __init__(self, config: dict, backup: RbdSource, log: BoundLogger): self.pool = config["pool"] self.image = config["image"] self.always_full = config.get("full-always", False) diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py index ba306b0d..9cd0f211 100644 --- a/src/backy/rbd/sources/file.py +++ b/src/backy/rbd/sources/file.py @@ -1,7 +1,7 @@ from structlog.stdlib import BoundLogger import backy.rbd.chunked -from backy.rbd import RbdBackup +from backy.rbd import RbdSource from backy.rbd.quarantine import QuarantineReport from backy.rbd.sources import ( BackySource, @@ -16,10 +16,10 @@ class File(BackySource, BackySourceFactory, BackySourceContext): filename: str cow: bool revision: Revision - rbdbackup: RbdBackup + rbdbackup: RbdSource log: BoundLogger - def __init__(self, config: dict, backup: RbdBackup, log: BoundLogger): + def __init__(self, config: dict, backup: RbdSource, log: BoundLogger): self.rbdbackup = backup self.filename = config["filename"] self.cow = config.get("cow", True) diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py index fa8a0ce2..f9063026 100644 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ b/src/backy/rbd/sources/flyingcircus/source.py @@ -7,14 +7,14 @@ from backy.utils import TimeOut, TimeOutError -from ... import RbdBackup +from ... import RbdSource from ..ceph.source import CephRBD class FlyingCircusRootDisk(CephRBD): snapshot_timeout = 90 - def __init__(self, config, backup: RbdBackup, log: BoundLogger): + def __init__(self, config, backup: RbdSource, log: BoundLogger): self.config = config self.vm = config["vm"] self.consul_acl_token = config.get("consul_acl_token") diff --git a/src/backy/rbd/tests/test_backy-rbd.py b/src/backy/rbd/tests/test_backy-rbd.py index b62ba9e8..d9d9418f 100644 --- a/src/backy/rbd/tests/test_backy-rbd.py +++ b/src/backy/rbd/tests/test_backy-rbd.py @@ -4,7 +4,7 @@ import pytest from backy.ext_deps import BACKY_CMD, BASH -from backy.rbd import RbdBackup +from backy.rbd import RbdSource from backy.rbd.conftest import create_rev from backy.revision import Revision from backy.tests import Ellipsis @@ -39,7 +39,7 @@ def test_smoketest_internal(tmp_path, log): % source1 ).encode("utf-8") ) - backup = RbdBackup(backup_dir, log) + backup = RbdSource(backup_dir, log) # Backup first state rev1 = create_rev(backup, {"manual:test"}) diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 9cb9bdb2..31cb60e9 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -96,7 +96,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): ) monkeypatch.setattr( - backy.rbd.RbdBackup, + backy.rbd.RbdSource, "backup", partialmethod(print_args, return_value=success), ) @@ -134,7 +134,7 @@ def test_call_unexpected_exception( def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.rbd.RbdBackup, "gc", do_raise) + monkeypatch.setattr(backy.rbd.RbdSource, "gc", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index b4429579..ea638a23 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,4 +1,4 @@ -from backy.rbd import RbdBackup +from backy.rbd import RbdSource from backy.rbd.sources.ceph.source import CephRBD @@ -17,7 +17,7 @@ def test_configure_ceph_source(tmp_path, log): image: test04 """ ) - backup = RbdBackup(tmp_path, log) + backup = RbdSource(tmp_path, log) assert isinstance(backup.source, CephRBD) assert backup.source.pool == "test" assert backup.source.image == "test04" diff --git a/src/backy/backup.py b/src/backy/repository.py similarity index 95% rename from src/backy/backup.py rename to src/backy/repository.py index 178a76c5..4c4d9d6e 100644 --- a/src/backy/backup.py +++ b/src/backy/repository.py @@ -20,6 +20,12 @@ from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule +from backy.source import Source + + + +class RepositoryNotEmpty(RuntimeError): + pass class StatusDict(TypedDict): @@ -38,8 +44,8 @@ class StatusDict(TypedDict): local_revs: int -class Backup(object): - """A generic backup of some data source.""" +class Repository(object): + """A repository of backup revisions of some object.""" path: Path config: dict @@ -60,7 +66,7 @@ def __init__(self, path: Path, log: BoundLogger): except IOError: self.log.error( "could-not-read-config", - _fmt_msg="Could not read config file. Is --backupdir correct?", + _fmt_msg="Could not read config file. Is the path correct?", config_path=str(self.path / "config"), ) raise @@ -68,6 +74,24 @@ def __init__(self, path: Path, log: BoundLogger): self.schedule = Schedule() self.schedule.configure(self.config["schedule"]) + @classmethod + def init(self, path: Path, log: BoundLogger, source: Source): + if (path / 'config').exists(): + raise RepositoryNotEmpty(self.path) + + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + + source_config = source.init(path, log) + + config = {'schedule': {}, 'source': source_config} + + with open(self.path / 'config', 'w') as f: + yaml.dump(f, config) + + self.log.info(f"Initialized empty repository in {self.path}") + + @property def problem_reports(self) -> list[str]: return [] @@ -122,6 +146,7 @@ def locked_function(self, *args, skip_lock=False, **kw): def name(self) -> str: return self.path.name + def to_dict(self): return self.config diff --git a/src/backy/source.py b/src/backy/source.py new file mode 100644 index 00000000..2cd932d2 --- /dev/null +++ b/src/backy/source.py @@ -0,0 +1,17 @@ +from typing import Any +from pathlib import Path +from structlog.stdlib import BoundLogger +from backy.file import FileSource + +# XXX Use plugin discovery here +KNOWN_SOURCES: dict[str, "Source"] = { s.type_: s for s in [FileSource] } + + +class Source: + + type_: str + config: dict[str, Any] + + @classmethod + def init(cls, repository: Path, log: BoundLogger) -> dict[str, Any]: + return {'type': cls.type_} From 4e76ba86b980ac3871d0f83ec39bd9f5c07d34e3 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Wed, 26 Jun 2024 14:57:43 +0200 Subject: [PATCH 10/25] snapshot: more renaming and test fixes --- lib.nix | 3 +- setup.py | 6 +- src/backy/cli/__init__.py | 30 ++-- src/backy/cli/client.py | 10 +- src/backy/cli/tests/test_client.py | 6 +- src/backy/conftest.py | 9 ++ src/backy/daemon/__init__.py | 18 +-- src/backy/daemon/api.py | 10 +- src/backy/daemon/scheduler.py | 129 +++++++----------- src/backy/daemon/tests/test_api.py | 24 ++-- src/backy/{ => daemon}/tests/test_callback.sh | 0 src/backy/daemon/tests/test_daemon.py | 38 +++--- src/backy/ext_deps.py | 8 +- src/backy/file/__init__.py | 12 +- src/backy/logging.py | 13 +- src/backy/rbd/__init__.py | 6 +- src/backy/rbd/backup.py | 12 +- src/backy/rbd/chunked/__init__.py | 17 ++- src/backy/rbd/chunked/chunk.py | 9 +- src/backy/rbd/chunked/file.py | 13 +- src/backy/rbd/chunked/tests/test_backend.py | 14 +- src/backy/rbd/conftest.py | 12 +- src/backy/rbd/sources/__init__.py | 9 +- src/backy/rbd/sources/ceph/source.py | 18 ++- .../sources/ceph/tests/test_ceph_source.py | 76 +++++------ src/backy/rbd/sources/file.py | 12 +- src/backy/rbd/sources/flyingcircus/source.py | 6 +- .../flyingcircus/tests/test_fc_source.py | 4 +- src/backy/rbd/tests/smoketest.sh | 6 +- src/backy/rbd/tests/test_backup.py | 52 +++---- src/backy/rbd/tests/test_backy-rbd.py | 60 ++++---- src/backy/rbd/tests/test_main.py | 10 +- src/backy/rbd/tests/test_source.py | 4 +- src/backy/repository.py | 23 ++-- src/backy/revision.py | 24 ++-- src/backy/schedule.py | 39 +++--- src/backy/source.py | 8 +- src/backy/tests/conftest.py | 14 +- src/backy/tests/samples/sample1.rev | 5 + src/backy/tests/samples/sample2.rev | 4 + src/backy/tests/test_backup.py | 80 +++++------ src/backy/tests/test_revision.py | 60 ++++---- src/backy/tests/test_schedule.py | 88 ++++++------ src/backy/tests/test_utils.py | 8 +- src/backy/utils.py | 6 +- 45 files changed, 519 insertions(+), 496 deletions(-) rename src/backy/{ => daemon}/tests/test_callback.sh (100%) create mode 100644 src/backy/tests/samples/sample1.rev create mode 100644 src/backy/tests/samples/sample2.rev diff --git a/lib.nix b/lib.nix index bb27f872..c963c2fe 100644 --- a/lib.nix +++ b/lib.nix @@ -127,7 +127,8 @@ in devShells = { default = mkShellNoCC { - BACKY_CMD = "${poetryEnv}/bin/backy"; + BACKY_CLI_CMD = "${poetryEnv}/bin/backy"; + BACKY_RBD_CMD = "${poetryEnv}/bin/backy-rbd"; packages = [ poetryEnv poetry diff --git a/setup.py b/setup.py index 6a30f027..14ffe3ba 100644 --- a/setup.py +++ b/setup.py @@ -93,7 +93,11 @@ def long_desc(): Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Topic :: System :: Archiving :: Backup -"""[:-1].split("\n"), +"""[ + :-1 + ].split( + "\n" + ), description=__doc__.strip(), long_description=long_desc(), packages=find_packages("src"), diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index 012604a0..1ab74ee5 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -17,17 +17,15 @@ from structlog.stdlib import BoundLogger import backy.daemon -from backy import logging -from backy.repository import Repository - import backy.source +from backy import logging # XXX invert this dependency -from backy.rbd.backup import RestoreBackend +from backy.rbd.backup import RbdRepository, RestoreBackend +from backy.repository import Repository from backy.utils import format_datetime_local, generate_taskid - class Command(object): """Proxy between CLI calls and actual backup code.""" @@ -115,7 +113,7 @@ def status(self, yaml_: bool, revision: str) -> None: ) def backup(self, tags: str, force: bool) -> int: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b._clean() try: tags_ = set(t.strip() for t in tags.split(",")) @@ -132,11 +130,11 @@ def backup(self, tags: str, force: bool) -> int: def restore( self, revision: str, target: str, restore_backend: RestoreBackend ) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.restore(revision, target, restore_backend) def find(self, revision: str, uuid: bool) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) for r in b.find_revisions(revision): if uuid: print(r.uuid) @@ -144,7 +142,7 @@ def find(self, revision: str, uuid: bool) -> None: print(r.filename) def forget(self, revision: str) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.forget(revision) b.warn_pending_changes() @@ -152,19 +150,19 @@ def scheduler(self, config: Path) -> None: backy.daemon.main(config, self.log) def purge(self) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.purge() def upgrade(self) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.upgrade() def distrust(self, revision: str) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.distrust(revision) def verify(self, revision: str) -> None: - b = Backup(self.path, self.log) + b = RbdRepository(self.path, self.log) b.verify(revision) def tags( @@ -345,9 +343,11 @@ def main(): "working directory." ), ) - p.add_argument("type", + p.add_argument( + "type", choices=list(backy.source.KNOWN_SOURCES), - help="Type of the source.") + help="Type of the source.", + ) subparsers = parser.add_subparsers() diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py index ca3ecbe6..1aebf006 100644 --- a/src/backy/cli/client.py +++ b/src/backy/cli/client.py @@ -25,10 +25,10 @@ def status(self, filter_re: Optional[Pattern[str]] = None) -> List[StatusDict]: for job in list(self.jobs.values()): if filter_re and not filter_re.search(job.name): continue - job.backup.scan() + job.repository.scan() manual_tags = set() unsynced_revs = 0 - history = job.backup.clean_history + history = job.repository.clean_history for rev in history: manual_tags |= filter_manual_tags(rev.tags) if rev.pending_changes: @@ -55,9 +55,11 @@ def status(self, filter_re: Optional[Pattern[str]] = None) -> List[StatusDict]: else None ), manual_tags=", ".join(manual_tags), - quarantine_reports=len(job.backup.quarantine.report_ids), + quarantine_reports=len(job.repository.quarantine.report_ids), unsynced_revs=unsynced_revs, - local_revs=len(job.backup.get_history(clean=True, local=True)), + local_revs=len( + job.repository.get_history(clean=True, local=True) + ), ) ) return result diff --git a/src/backy/cli/tests/test_client.py b/src/backy/cli/tests/test_client.py index ce8efa4e..470a8886 100644 --- a/src/backy/cli/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -272,7 +272,7 @@ async def test_cli_check_ok(daemon, cli_client): async def test_cli_check_too_old(daemon, clock, cli_client, log): job = daemon.jobs["test01"] - revision = Revision.create(job.backup, set(), log) + revision = Revision.create(job.repository, set(), log) revision.timestamp = utils.now() - datetime.timedelta(hours=48) revision.stats["duration"] = 60.0 revision.materialize() @@ -298,7 +298,7 @@ async def test_cli_check_too_old(daemon, clock, cli_client, log): async def test_cli_check_manual_tags(daemon, cli_client, log): job = daemon.jobs["test01"] - revision = Revision.create(job.backup, {"manual:test"}, log) + revision = Revision.create(job.repository, {"manual:test"}, log) revision.stats["duration"] = 60.0 revision.materialize() @@ -323,7 +323,7 @@ async def test_cli_check_manual_tags(daemon, cli_client, log): async def test_cli_check_quarantine(daemon, cli_client, log): job = daemon.jobs["test01"] - job.backup.quarantine.add_report(QuarantineReport(b"a", b"b", 0)) + job.repository.quarantine.add_report(QuarantineReport(b"a", b"b", 0)) utils.log_data = "" try: diff --git a/src/backy/conftest.py b/src/backy/conftest.py index f94a30af..77129f36 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -6,12 +6,21 @@ import pytest import structlog +import tzlocal import backy.logging import backy.schedule from backy import utils +@pytest.fixture +def tz_berlin(monkeypatch): + """Fix time zone to gain independece from runtime environment.""" + monkeypatch.setattr( + tzlocal, "get_localzone", lambda: ZoneInfo("Europe/Berlin") + ) + + @pytest.fixture(autouse=True, scope="session") def fix_pytest_coverage_465(): if "COV_CORE_SOURCE" in os.environ: diff --git a/src/backy/daemon/__init__.py b/src/backy/daemon/__init__.py index 5b4c1dc9..f72c5479 100644 --- a/src/backy/daemon/__init__.py +++ b/src/backy/daemon/__init__.py @@ -43,7 +43,7 @@ class BackyDaemon(object): config: dict schedules: dict[str, Schedule] jobs: dict[str, Job] - dead_backups: dict[str, Repository] + dead_repositories: dict[str, Repository] backup_semaphores: dict[str, asyncio.BoundedSemaphore] log: BoundLogger @@ -59,7 +59,7 @@ def __init__(self, config_file: Path, log: BoundLogger): self.schedules = {} self.backup_semaphores = {} self.jobs = {} - self.dead_backups = {} + self.dead_repositories = {} self._lock = None self.reload_api = asyncio.Event() self.api_addrs = ["::1", "127.0.0.1"] @@ -137,12 +137,12 @@ def _apply_config(self): del self.jobs[name] self.log.info("deleted-job", job_name=name) - self.dead_backups.clear() + self.dead_repositories.clear() for b in os.scandir(self.base_dir): if b.name in self.jobs or not b.is_dir(follow_symlinks=False): continue try: - self.dead_backups[b.name] = Repository( + self.dead_repositories[b.name] = Repository( self.base_dir / b.name, self.log.bind(job_name=b.name), ) @@ -321,7 +321,7 @@ async def purge_pending_backups(self): ): continue self.log.info("purging-pending", job=candidate.name) - await Job(self, candidate.name, self.log).run_purge() + await Job(self, candidate.name, self.log).run_gc() self.log.info("purge-pending-finished") except Exception: self.log.exception("purge-pending") @@ -338,10 +338,10 @@ def status( for job in list(self.jobs.values()): if filter_re and not filter_re.search(job.name): continue - job.backup.scan() + job.repository.scan() manual_tags = set() unsynced_revs = 0 - history = job.backup.clean_history + history = job.repository.clean_history for rev in history: manual_tags |= filter_manual_tags(rev.tags) if rev.pending_changes: @@ -370,10 +370,10 @@ def status( else None ), manual_tags=", ".join(manual_tags), - problem_reports=job.backup.problem_reports, + problem_reports=job.repository.problem_reports, unsynced_revs=unsynced_revs, local_revs=len( - job.backup.get_history(clean=True, local=True) + job.repository.get_history(clean=True, local=True) ), ) ) diff --git a/src/backy/daemon/api.py b/src/backy/daemon/api.py index 1e0eabc4..87b5cc02 100644 --- a/src/backy/daemon/api.py +++ b/src/backy/daemon/api.py @@ -183,18 +183,18 @@ async def run_job(self, request: web.Request): async def list_backups(self, request: web.Request): request["log"].info("list-backups") - return to_json(list(self.daemon.dead_backups.keys())) + return to_json(list(self.daemon.dead_repositories.keys())) async def get_backup( self, request: web.Request, allow_active: bool ) -> Repository: name = request.match_info.get("backup_name") request["log"].info("get-backups", name=name) - if name in self.daemon.dead_backups: - return self.daemon.dead_backups[name] + if name in self.daemon.dead_repositories: + return self.daemon.dead_repositories[name] if name in self.daemon.jobs: if allow_active: - return self.daemon.jobs[name].backup + return self.daemon.jobs[name].repository request["log"].info("get-backups-forbidden", name=name) raise HTTPForbidden() request["log"].info("get-backups-not-found", name=name) @@ -389,7 +389,7 @@ async def get_revs( async def put_tags(self, rev: Revision, autoremove: bool = False): async with self.session.put( - f"/v1/backups/{rev.backup.name}/revs/{rev.uuid}/tags", + f"/v1/backups/{rev.repository.name}/revs/{rev.uuid}/tags", json={"old_tags": list(rev.orig_tags), "new_tags": list(rev.tags)}, params={"autoremove": int(autoremove)}, ): diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index 5b5f23ac..bba31e01 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -16,8 +16,8 @@ from structlog.stdlib import BoundLogger import backy.utils +from backy.ext_deps import BACKY_CLI_CMD, BACKY_RBD_CMD from backy.repository import Repository -from backy.ext_deps import BACKY_CMD from backy.revision import Revision from backy.schedule import Schedule from backy.utils import ( @@ -31,6 +31,7 @@ if TYPE_CHECKING: from backy.daemon import BackyDaemon + from backy.repository import StatusDict class Job(object): @@ -41,7 +42,7 @@ class Job(object): next_time: Optional[datetime.datetime] = None next_tags: Optional[set[str]] = None path: Path - backup: Repository + repository: Repository logfile: Path last_config: Optional[dict] = None daemon: "BackyDaemon" @@ -65,7 +66,7 @@ def configure(self, config: dict) -> None: self.source = config["source"] self.schedule_name = config["schedule"] self.update_config() - self.backup = Backup(self.path, self.log) + self.repository = Repository(self.path, self.log) self.last_config = config @property @@ -92,9 +93,9 @@ def sla(self) -> bool: @property def sla_overdue(self) -> int: """Amount of time the SLA is currently overdue.""" - if not self.backup.clean_history: + if not self.repository.clean_history: return 0 - age = backy.utils.now() - self.backup.clean_history[-1].timestamp + age = backy.utils.now() - self.repository.clean_history[-1].timestamp max_age = min(x["interval"] for x in self.schedule.schedule.values()) if age > max_age * 1.5: return age.total_seconds() @@ -146,15 +147,14 @@ async def _wait_for_leader(self, next_time: datetime.datetime) -> bool: try: api = ClientManager(self.daemon.peers, self.taskid, self.log) statuses = await asyncio.gather( - *[ - api[server].fetch_status(f"^{self.name}$") - for server in api - ], + *[api[server].fetch_status(f"^{self.name}$") for server in api], return_exceptions=True, ) leader = None - leader_revs = len(self.backup.get_history(clean=True, local=True)) - leader_status: "backy.repository.StatusDict" + leader_revs = len( + self.repository.get_history(clean=True, local=True) + ) + leader_status: "StatusDict" self.log.info("local-revs", local_revs=leader_revs) for server, status in zip(api, statuses): log = self.log.bind(server=server) @@ -234,14 +234,12 @@ async def run_forever(self) -> None: self.log.debug("loop-started") while True: self.taskid = generate_taskid() - self.log = self.log.bind( - job_name=self.name, sub_taskid=self.taskid - ) + self.log = self.log.bind(job_name=self.name, sub_taskid=self.taskid) - self.backup = Backup(self.path, self.log) + self.repository = Repository(self.path, self.log) next_time, next_tags = self.schedule.next( - backy.utils.now(), self.spread, self.backup + backy.utils.now(), self.spread, self.repository ) if self.errors: @@ -273,13 +271,15 @@ async def run_forever(self) -> None: if not run_immediately and await self._wait_for_leader( next_time ): - await self.pull_metadata() + await self.repository.run_with_backup_lock( + self.pull_metadata, self.daemon.peers, self.taskid + ) await self.run_callback() else: speed = "slow" if ( - self.backup.clean_history - and self.backup.clean_history[-1].stats["duration"] + self.repository.clean_history + and self.repository.clean_history[-1].stats["duration"] < 600 ): speed = "fast" @@ -288,11 +288,17 @@ async def run_forever(self) -> None: async with self.daemon.backup_semaphores[speed]: self.update_status(f"running ({speed})") + self.repository._clean() await self.run_backup(next_tags) - await self.pull_metadata() + self.repository.scan() + await self.repository.run_with_backup_lock( + self.pull_metadata, self.daemon.peers, self.taskid + ) await self.run_expiry() - await self.push_metadata() - await self.run_purge() + await self.repository.run_with_backup_lock( + self.push_metadata, self.daemon.peers, self.taskid + ) + await self.run_gc() await self.run_callback() except asyncio.CancelledError: raise @@ -317,16 +323,16 @@ async def run_forever(self) -> None: async def run_backup(self, tags: Set[str]) -> None: self.log.info("backup-started", tags=", ".join(tags)) + r = Revision.create(self.repository, tags, self.log) + r.materialize() proc = await asyncio.create_subprocess_exec( - BACKY_CMD, + BACKY_RBD_CMD, "-t", self.taskid, "-b", str(self.path), - "-l", - str(self.logfile), "backup", - ",".join(tags), + r.uuid, close_fds=True, start_new_session=True, # Avoid signal propagation like Ctrl-C stdin=subprocess.DEVNULL, @@ -354,51 +360,18 @@ async def run_backup(self, tags: Set[str]) -> None: async def run_expiry(self) -> None: self.log.info("expiry-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "expire", - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "expiry-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"Expiry failed with return code {return_code}" - ) - except asyncio.CancelledError: - self.log.warning("expiry-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise + # includes lock and repository.scan() + self.repository.expire() - async def run_purge(self) -> None: - self.log.info("purge-started") + async def run_gc(self) -> None: + self.log.info("gc-started") proc = await asyncio.create_subprocess_exec( - BACKY_CMD, + BACKY_RBD_CMD, "-t", self.taskid, "-b", str(self.path), - "-l", - str(self.logfile), - "purge", + "gc", # start_new_session=True, # Avoid signal propagation like Ctrl-C. # close_fds=True, stdin=subprocess.DEVNULL, @@ -408,12 +381,12 @@ async def run_purge(self) -> None: try: return_code = await proc.wait() self.log.info( - "purge-finished", + "gc-finished", return_code=return_code, subprocess_pid=proc.pid, ) except asyncio.CancelledError: - self.log.warning("purge-cancelled", subprocess_pid=proc.pid) + self.log.warning("gc-cancelled", subprocess_pid=proc.pid) try: proc.terminate() except ProcessLookupError: @@ -427,8 +400,9 @@ async def run_callback(self) -> None: self.log.info("callback-started") read, write = os.pipe() + # TODO backy_proc = await asyncio.create_subprocess_exec( - BACKY_CMD, + BACKY_CLI_CMD, "-b", str(self.path), "-l", @@ -492,10 +466,9 @@ def stop(self) -> None: self._task = None self.update_status("") - @Repository.locked(target=".backup", mode="exclusive") async def push_metadata(self, peers, taskid: str) -> int: grouped = defaultdict(list) - for r in self.backup.clean_history: + for r in self.repository.clean_history: if r.pending_changes: grouped[r.server].append(r) self.log.info( @@ -558,10 +531,9 @@ async def _push_metadata(self, api: Client, revs: List[Revision]) -> bool: error = True return error - @Repository.locked(target=".backup", mode="exclusive") async def pull_metadata(self, peers: dict, taskid: str) -> int: async def remove_dead_peer(): - for r in list(self.backup.history): + for r in list(self.repository.history): if r.server and r.server not in peers: self.log.info( "pull-removing-dead-peer", @@ -585,7 +557,7 @@ async def _pull_metadata(self, api: Client) -> bool: log = self.log.bind(server=api.server_name) try: await api.touch_backup(self.name) - remote_revs = await api.get_revs(self) + remote_revs = await api.get_revs(self.repository) log.debug("pull-found-revs", revs=len(remote_revs)) except ClientResponseError as e: if e.status in [ @@ -606,16 +578,21 @@ async def _pull_metadata(self, api: Client) -> bool: remote_revs = [] local_uuids = { - r.uuid for r in self.backup.history if r.server == api.server_name + r.uuid + for r in self.repository.history + if r.server == api.server_name } remote_uuids = {r.uuid for r in remote_revs} for uuid in local_uuids - remote_uuids: log.warning("pull-removing-unknown-rev", rev_uuid=uuid) - self.backup.find_by_uuid(uuid).remove(force=True) + self.repository.find_by_uuid(uuid).remove(force=True) for r in remote_revs: if r.uuid in local_uuids: - if r.to_dict() == self.backup.find_by_uuid(r.uuid).to_dict(): + if ( + r.to_dict() + == self.repository.find_by_uuid(r.uuid).to_dict() + ): continue log.debug("pull-updating-rev", rev_uid=r.uuid) else: diff --git a/src/backy/daemon/tests/test_api.py b/src/backy/daemon/tests/test_api.py index fc7376b5..b0796862 100644 --- a/src/backy/daemon/tests/test_api.py +++ b/src/backy/daemon/tests/test_api.py @@ -140,7 +140,7 @@ async def test_remove_peer(daemons, log): ds = await daemons(2) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) assert [r.uuid for r in b0.history] == [rev0.uuid] @@ -159,11 +159,11 @@ async def test_remove_remote_backup(daemons, log): ds = await daemons(2) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) j1 = ds[1].jobs["test01"] - b1 = j1.backup + b1 = j1.repository rev1 = create_rev(b1, log) assert [r.uuid for r in b0.history] == [rev0.uuid] @@ -194,11 +194,11 @@ async def test_simple_sync(daemons, log): ds = await daemons(3) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) j1 = ds[1].jobs["test01"] - b1 = j1.backup + b1 = j1.repository rev1 = create_rev(b1, log) # ignore offline servers @@ -214,7 +214,7 @@ async def test_simple_sync(daemons, log): assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] new_rev1 = b0.history[1] - assert new_rev1.backup == b0 + assert new_rev1.repository == b0 assert new_rev1.timestamp == rev1.timestamp assert new_rev1.stats == rev1.stats assert new_rev1.tags == rev1.tags @@ -226,14 +226,14 @@ async def test_simple_sync(daemons, log): rev1.distrust() rev1.tags = {"manual:new"} rev1.write_info() - rev1.backup.scan() + rev1.repository.scan() await j0.pull_metadata() b0.scan() assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] new_rev1 = b0.history[1] - assert new_rev1.backup == b0 + assert new_rev1.repository == b0 assert new_rev1.timestamp == rev1.timestamp assert new_rev1.backend_type == "" assert new_rev1.stats == rev1.stats @@ -244,7 +244,7 @@ async def test_simple_sync(daemons, log): # mark rev for deletion new_rev1.remove() - new_rev1.backup.scan() + new_rev1.repository.scan() assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] assert new_rev1.tags == set() assert new_rev1.orig_tags == rev1.tags @@ -280,7 +280,7 @@ async def test_split_brain(daemons, log): await modify_authtokens(ds, [0, 1], [2, 3], allow=False, bidirectional=True) js = [d.jobs["test01"] for d in ds] - bs = [j.backup for j in js] + bs = [j.repository for j in js] revs = [create_rev(b, log) for b in bs] for b, r in zip(bs, revs): @@ -288,7 +288,7 @@ async def test_split_brain(daemons, log): for j in js: await j.pull_metadata() - j.backup.scan() + j.repository.scan() del ds[0].config["jobs"]["test01"] ds[0]._apply_config() @@ -388,7 +388,7 @@ async def null_coroutine(*args, delay=0.1, **kw): await asyncio.sleep(delay) async def run_backup(job, tags, delta=datetime.timedelta()): - r = Revision.create(job.backup, tags, log) + r = Revision.create(job.repository, tags, log) r.timestamp = backy.utils.now() + delta r.stats["duration"] = 1 r.write_info() diff --git a/src/backy/tests/test_callback.sh b/src/backy/daemon/tests/test_callback.sh similarity index 100% rename from src/backy/tests/test_callback.sh rename to src/backy/daemon/tests/test_callback.sh diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index 40e0cddc..031b8b67 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -150,9 +150,9 @@ async def test_run_backup(daemon, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) - job.backup.scan() - assert len(job.backup.history) == 1 - revision = job.backup.history[0] + job.repository.scan() + assert len(job.repository.history) == 1 + revision = job.repository.history[0] assert revision.tags == {"manual:asdf"} backend = ChunkedFileBackend(revision, log) with backend.open("r") as f: @@ -161,9 +161,9 @@ async def test_run_backup(daemon, log): # Run again. This also covers the code path that works if # the target backup directory exists already. await job.run_backup({"manual:asdf"}) - job.backup.scan() - assert len(job.backup.history) == 2 - revision = job.backup.history[1] + job.repository.scan() + assert len(job.repository.history) == 2 + revision = job.repository.history[1] assert revision.tags == {"manual:asdf"} backend = ChunkedFileBackend(revision, log) with backend.open("r") as f: @@ -203,7 +203,7 @@ def test_sla_before_first_backup(daemon): # I agree that this gives us a blind spot in the beginning. I'll # think of something when this happens. Maybe keeping a log of errors # or so to notice that we tried previously. - assert len(job.backup.history) == 0 + assert len(job.repository.history) == 0 assert job.sla is True @@ -213,31 +213,31 @@ def test_sla_over_time(daemon, clock, tmp_path, log): # I agree that this gives us a blind spot in the beginning. I'll # think of something when this happens. Maybe keeping a log of errors # or so to notice that we tried previously. - revision = Revision.create(job.backup, set(), log) + revision = Revision.create(job.repository, set(), log) # We're on a 24h cycle. 6 hours old backup is fine. revision.timestamp = utils.now() - datetime.timedelta(hours=6) revision.stats["duration"] = 60.0 revision.materialize() - job.backup.scan() - assert len(job.backup.history) == 1 + job.repository.scan() + assert len(job.repository.history) == 1 assert job.sla is True # 24 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=24) revision.write_info() - job.backup.scan() + job.repository.scan() assert job.sla is True # 32 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=32) revision.write_info() - job.backup.scan() + job.repository.scan() assert job.sla is True # 24*1.5 hours is the last time that is OK. revision.timestamp = utils.now() - datetime.timedelta(hours=24 * 1.5) revision.write_info() - job.backup.scan() + job.repository.scan() assert job.sla is True # 1 second later we consider this not to be good any longer. @@ -247,26 +247,26 @@ def test_sla_over_time(daemon, clock, tmp_path, log): - datetime.timedelta(seconds=1) ) revision.write_info() - job.backup.scan() + job.repository.scan() assert job.sla is False # a running backup does not influence this. job.update_status("running (slow)") - r = Revision.create(job.backup, {"daily"}, log) + r = Revision.create(job.repository, {"daily"}, log) r.write_info() assert job.sla is False def test_incomplete_revs_dont_count_for_sla(daemon, clock, tmp_path, log): job = daemon.jobs["test01"] - r1 = Revision.create(job.backup, set(), log) + r1 = Revision.create(job.repository, set(), log) r1.timestamp = utils.now() - datetime.timedelta(hours=48) r1.stats["duration"] = 60.0 r1.materialize() - r2 = Revision.create(job.backup, set(), log) + r2 = Revision.create(job.repository, set(), log) r2.timestamp = utils.now() - datetime.timedelta(hours=1) r2.materialize() - job.backup.scan() + job.repository.scan() assert False is job.sla @@ -427,7 +427,7 @@ async def test_purge_pending(daemon, monkeypatch): "asyncio.sleep", mock.Mock(side_effect=asyncio.CancelledError()) ) - daemon.jobs["test01"].backup.set_purge_pending() + daemon.jobs["test01"].repository.set_purge_pending() del daemon.jobs["test01"] with pytest.raises(asyncio.CancelledError): diff --git a/src/backy/ext_deps.py b/src/backy/ext_deps.py index cc2ce7d9..27094000 100644 --- a/src/backy/ext_deps.py +++ b/src/backy/ext_deps.py @@ -6,10 +6,14 @@ import os import sys -BACKY_CMD = os.environ.get( - "BACKY_CMD", +BACKY_CLI_CMD = os.environ.get( + "BACKY_CLI_CMD", os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy"), ) +BACKY_RBD_CMD = os.environ.get( + "BACKY_RBD_CMD", + os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy-rbd"), +) CP = os.environ.get("BACKY_CP", "cp") RBD = os.environ.get("BACKY_RBD", "rbd") BACKY_EXTRACT = os.environ.get("BACKY_EXTRACT", "backy-extract") diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 4e2d3d96..3fadefd4 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -3,25 +3,25 @@ import os import sys from pathlib import Path -from structlog.stdlib import BoundLogger +from typing import TYPE_CHECKING import structlog +from structlog.stdlib import BoundLogger from backy.utils import generate_taskid from .. import logging -from typing import TYPE_CHECKING - if TYPE_CHECKING: from backy.repository import Repository + class FileSource: type_ = "file" @classmethod - def init(cls, repository: Repository, log: BoundLogger): - return {'type': self.type_} + def init(cls, repository: "Repository", log: BoundLogger): + return {"type": cls.type_} def main(): @@ -110,7 +110,7 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdBackup(args.backupdir, log) + b = RbdRepository(args.backupdir, log) # XXX scheduler? b._clean() ret = 0 diff --git a/src/backy/logging.py b/src/backy/logging.py index 223a77db..561a9dad 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -118,9 +118,7 @@ def __getattr__(self, name): def prefix(prefix, line): - return "{}>\t".format(prefix) + line.replace( - "\n", "\n{}>\t".format(prefix) - ) + return "{}>\t".format(prefix) + line.replace("\n", "\n{}>\t".format(prefix)) class ConsoleFileRenderer: @@ -145,9 +143,7 @@ def __init__(self, min_level, pad_event=_EVENT_WIDTH): self.min_level = self.LEVELS.index(min_level.lower()) if colorama is None: print( - _MISSING.format( - who=self.__class__.__name__, package="colorama" - ) + _MISSING.format(who=self.__class__.__name__, package="colorama") ) if COLORIZED_TTY_OUTPUT: colorama.init() @@ -216,10 +212,7 @@ def write(line): level = event_dict.pop("level", None) if level is not None: write( - self._level_to_color[level] - + level[0].upper() - + RESET_ALL - + " " + self._level_to_color[level] + level[0].upper() + RESET_ALL + " " ) job_name = event_dict.pop("job_name", "-") diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 7f3fc477..cc897554 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -8,7 +8,7 @@ from backy.utils import generate_taskid from .. import logging -from .backup import RbdSource, RestoreBackend +from .backup import RbdRepository, RestoreBackend def main(): @@ -98,9 +98,7 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdSource(args.backupdir, log) - # XXX scheduler? - b._clean() + b = RbdRepository(args.backupdir, log) ret = 0 match args.func: case "backup": diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index 61aa9cfe..b1b9579e 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -12,7 +12,6 @@ from ..ext_deps import BACKY_EXTRACT from ..repository import Repository from ..revision import Revision, Trust -from ..source import Source from ..utils import CHUNK_SIZE, copy, posix_fadvise from .chunked import ChunkedFileBackend from .chunked.chunk import BackendException @@ -42,7 +41,7 @@ def __str__(self): return self.value -class RbdSource(Source): +class RbdRepository(Repository): """A backup of a VM. Provides access to methods to @@ -54,13 +53,8 @@ class RbdSource(Source): source: BackySourceFactory quarantine: QuarantineStore - _lock_fds: dict[str, IO] - def __init__(self, path: Path, log: BoundLogger): super().__init__(path, log) - self._lock_fds = {} - - self.scan() # Initialize our source try: @@ -87,8 +81,8 @@ def problem_reports(self): @Repository.locked(target=".backup", mode="exclusive") @Repository.locked(target=".purge", mode="shared") - def backup(self, revision: str) -> bool: - new_revision = self.find_by_uuid(revision) + def backup(self, rev_uuid: str) -> bool: + new_revision = self.find_by_uuid(rev_uuid) self.prevent_remote_rev([new_revision]) self.path.joinpath("last").unlink(missing_ok=True) diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py index 09a86555..446848f4 100644 --- a/src/backy/rbd/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -6,6 +6,7 @@ from backy.revision import Revision, Trust from backy.utils import END, report_status +from ...repository import Repository from .chunk import Chunk, Hash from .file import File from .store import Store @@ -14,13 +15,17 @@ class ChunkedFileBackend: # multiple Backends may share the same store STORES: dict[Path, Store] = dict() + repository: Repository + revision: Revision + store: Store + log: BoundLogger def __init__(self, revision: Revision, log: BoundLogger): - self.backup = revision.backup + self.repository = revision.repository self.revision = revision - path = self.backup.path / "chunks" + path = self.repository.path / "chunks" if path not in self.STORES: - self.STORES[path] = Store(self.backup.path / "chunks", log) + self.STORES[path] = Store(self.repository.path / "chunks", log) self.store = self.STORES[path] self.log = log.bind(subsystem="chunked") @@ -38,7 +43,7 @@ def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> File: # overlay = True file = File(self.revision.filename, self.store, mode, overlay) - if file.writable() and self.backup.contains_distrusted: + if file.writable() and self.repository.contains_distrusted: # "Force write"-mode if any revision is distrusted. self.log.warn("forcing-full") self.store.force_writes = True @@ -48,7 +53,7 @@ def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> File: # def purge(self) -> None: self.log.debug("purge") used_chunks: Set[Hash] = set() - for revision in self.backup.local_history: + for revision in self.repository.local_history: used_chunks.update( type(self)(revision, self.log).open()._mapping.values() ) @@ -61,7 +66,7 @@ def verify(self): verified_chunks: Set[Hash] = set() # Load verified chunks to avoid duplicate work - for revision in self.backup.get_history(clean=True, local=True): + for revision in self.repository.get_history(clean=True, local=True): if revision.trust != Trust.VERIFIED: continue verified_chunks.update( diff --git a/src/backy/rbd/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py index 950120cf..03fa9b4d 100644 --- a/src/backy/rbd/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -2,7 +2,7 @@ import io import os import tempfile -from typing import Optional, Tuple, TypeAlias +from typing import TYPE_CHECKING, Optional, Tuple, TypeAlias import lzo import mmh3 @@ -10,6 +10,9 @@ import backy.rbd.chunked from backy.utils import posix_fadvise +if TYPE_CHECKING: + from backy.rbd.chunked import Store + Hash: TypeAlias = str chunk_stats = { @@ -39,13 +42,13 @@ class Chunk(object): CHUNK_SIZE = 4 * 1024**2 # 4 MiB chunks hash: Optional[Hash] - store: "backy.backends.chunked.Store" + store: "Store" clean: bool data: Optional[io.BytesIO] def __init__( self, - store: "backy.backends.chunked.Store", + store: "Store", hash: Optional[Hash], ): self.hash = hash diff --git a/src/backy/rbd/chunked/file.py b/src/backy/rbd/chunked/file.py index 48a5e168..29f361bd 100644 --- a/src/backy/rbd/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -4,12 +4,15 @@ import os.path import time from collections import defaultdict -from typing import Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple import backy.rbd.chunked from .chunk import Chunk, Hash +if TYPE_CHECKING: + from backy.rbd.chunked import Store + class File(object): """A file like class that stores its data in 4MiB chunks @@ -32,7 +35,7 @@ class File(object): flush_target = 10 name: str - store: "backy.backends.chunked.Store" + store: "Store" closed: bool overlay: bool size: int @@ -46,7 +49,7 @@ class File(object): def __init__( self, name: str | os.PathLike, - store: "backy.backends.chunked.Store", + store: "Store", mode: str = "rw", overlay: bool = False, ): @@ -162,9 +165,7 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: elif whence == io.SEEK_CUR: position = position + offset else: - raise ValueError( - "`whence` does not support mode {}".format(whence) - ) + raise ValueError("`whence` does not support mode {}".format(whence)) if position < 0: raise ValueError("Can not seek before the beginning of a file.") diff --git a/src/backy/rbd/chunked/tests/test_backend.py b/src/backy/rbd/chunked/tests/test_backend.py index 52c49cd1..858fd956 100644 --- a/src/backy/rbd/chunked/tests/test_backend.py +++ b/src/backy/rbd/chunked/tests/test_backend.py @@ -2,8 +2,8 @@ from backy.revision import Revision -def test_overlay(rbdbackup, log): - r = Revision.create(rbdbackup, set(), log) +def test_overlay(rbdrepository, log): + r = Revision.create(rbdrepository, set(), log) backend = ChunkedFileBackend(r, log) # Write 1 version to the file f = backend.open("w") @@ -25,20 +25,20 @@ def test_overlay(rbdbackup, log): f.close() -def test_purge(rbdbackup, log): - r = Revision.create(rbdbackup, set(), log) +def test_purge(rbdrepository, log): + r = Revision.create(rbdrepository, set(), log) backend = ChunkedFileBackend(r, log) # Write 1 version to the file f = backend.open("w") f.write(b"asdf") f.close() r.materialize() - remote = Revision(rbdbackup, log) # remote revision without local data + remote = Revision(rbdrepository, log) # remote revision without local data remote.server = "remote" remote.materialize() - rbdbackup.scan() + rbdrepository.scan() # Reassign as the scan will create a new reference - r = rbdbackup.history[0] + r = rbdrepository.history[0] assert len(list(backend.store.ls())) == 1 backend.purge() assert len(list(backend.store.ls())) == 1 diff --git a/src/backy/rbd/conftest.py b/src/backy/rbd/conftest.py index e0b021d2..e475bda5 100644 --- a/src/backy/rbd/conftest.py +++ b/src/backy/rbd/conftest.py @@ -3,14 +3,14 @@ import pytest -from backy.rbd import RbdSource +from backy.rbd import RbdRepository from backy.revision import Revision fixtures = os.path.dirname(__file__) + "/tests/samples" @pytest.fixture -def rbdbackup(schedule, tmp_path, log): +def rbdrepository(schedule, tmp_path, log): with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: json.dump( { @@ -22,11 +22,11 @@ def rbdbackup(schedule, tmp_path, log): }, f, ) - return RbdSource(tmp_path, log) + return RbdRepository(tmp_path, log) -def create_rev(rbdbackup, tags): - r = Revision.create(rbdbackup, tags, rbdbackup.log) +def create_rev(rbdrepository, tags): + r = Revision.create(rbdrepository, tags, rbdrepository.log) r.materialize() - rbdbackup.scan() + rbdrepository.scan() return r diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py index 9665fe62..9f632edf 100644 --- a/src/backy/rbd/sources/__init__.py +++ b/src/backy/rbd/sources/__init__.py @@ -6,16 +6,17 @@ import backy.revision if TYPE_CHECKING: - from backy.rbd import RbdSource + from backy.rbd import RbdRepository + from backy.rbd.chunked import ChunkedFileBackend class BackySource(ABC): @abstractmethod - def backup(self, target: "backy.backends.BackyBackend") -> None: + def backup(self, target: "ChunkedFileBackend") -> None: ... @abstractmethod - def verify(self, target: "backy.backends.BackyBackend") -> bool: + def verify(self, target: "ChunkedFileBackend") -> bool: ... @@ -31,7 +32,7 @@ def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): class BackySourceFactory(ABC): @abstractmethod def __init__( - self, config: dict, backup: "RbdBackup", log: BoundLogger + self, config: dict, repository: "RbdRepository", log: BoundLogger ) -> None: ... diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index 2835f038..514e3b48 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -5,7 +5,7 @@ import backy.utils from backy.revision import Revision -from ... import RbdSource +from ... import RbdRepository from ...chunked import ChunkedFileBackend from ...quarantine import QuarantineReport from .. import BackySource, BackySourceContext, BackySourceFactory @@ -25,15 +25,17 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): log: BoundLogger rbd: RBDClient revision: Revision - rbdbackup: RbdSource + repository: RbdRepository - def __init__(self, config: dict, backup: RbdSource, log: BoundLogger): + def __init__( + self, config: dict, repository: RbdRepository, log: BoundLogger + ): self.pool = config["pool"] self.image = config["image"] self.always_full = config.get("full-always", False) self.log = log.bind(subsystem="ceph") self.rbd = RBDClient(self.log) - self.rbdbackup = backup + self.repository = repository def ready(self) -> bool: """Check whether the source can be backed up. @@ -140,7 +142,7 @@ def verify(self, target: ChunkedFileBackend) -> bool: return backy.utils.files_are_roughly_equal( source, target_, - report=lambda s, t, o: self.rbdbackup.quarantine.add_report( + report=lambda s, t, o: self.repository.quarantine.add_report( QuarantineReport(s, t, o) ), ) @@ -153,8 +155,10 @@ def _delete_old_snapshots(self) -> None: # full backups instead of new deltas based on the most recent valid # one. # XXX this will break if multiple servers are active - if not self.always_full and self.revision.backup.local_history: - keep_snapshot_revision = self.revision.backup.local_history[-1].uuid + if not self.always_full and self.revision.repository.local_history: + keep_snapshot_revision = self.revision.repository.local_history[ + -1 + ].uuid else: keep_snapshot_revision = None for snapshot in self.rbd.snap_ls(self._image_name): diff --git a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py index 2f3687f8..13d7c32d 100644 --- a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py +++ b/src/backy/rbd/sources/ceph/tests/test_ceph_source.py @@ -28,10 +28,10 @@ def check_output(monkeypatch): @pytest.fixture -def ceph_rbd_imagesource(rbdclient, rbdbackup, nosleep, log): +def ceph_rbd_imagesource(rbdclient, rbdrepository, nosleep, log): """Provides a CephRBD object configured for image pool/test, with rbd being mocked away and allowing snapshots on that image.""" - source = CephRBD(dict(pool="test", image="foo"), rbdbackup, log) + source = CephRBD(dict(pool="test", image="foo"), rbdrepository, log) # rbdclient mock setup: rbdclient._ceph_cli._register_image_for_snaps("test/foo") source.rbd = rbdclient @@ -54,12 +54,12 @@ def test_assign_revision(nosleep, log): assert context_manager.revision is revision -def test_context_manager(rbdbackup, ceph_rbd_imagesource, log): +def test_context_manager(rbdrepository, ceph_rbd_imagesource, log): """The imagesource context manager around a backup revision must create a corresponding snapshot at enter, and clean up at exit.""" source = ceph_rbd_imagesource - revision = Revision.create(rbdbackup, set(), log, uuid="1") + revision = Revision.create(rbdrepository, set(), log, uuid="1") with source(revision): assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-1" @@ -67,7 +67,7 @@ def test_context_manager(rbdbackup, ceph_rbd_imagesource, log): def test_context_manager_cleans_out_snapshots( - ceph_rbd_imagesource, rbdbackup, log + ceph_rbd_imagesource, rbdrepository, log ): """The imagesource context manager cleans up unexpected backy snapshot revisions. Snapshots without the prefix 'backy-' are left untouched.""" @@ -78,10 +78,10 @@ def test_context_manager_cleans_out_snapshots( # unexpected revision snapshots are cleaned source.rbd.snap_create("test/foo@backy-2") - revision = Revision.create(rbdbackup, set(), log, uuid="1") + revision = Revision.create(rbdrepository, set(), log, uuid="1") with source(revision): revision.materialize() - rbdbackup.scan() + rbdrepository.scan() assert source.rbd.snap_ls("test/foo") == [ { @@ -101,7 +101,7 @@ def test_context_manager_cleans_out_snapshots( ] -def test_choose_full_without_parent(ceph_rbd_imagesource, rbdbackup, log): +def test_choose_full_without_parent(ceph_rbd_imagesource, rbdrepository, log): """When backing up a revision without a parent, a full backup needs to happen. The diff function must not be called.""" source = ceph_rbd_imagesource @@ -109,7 +109,7 @@ def test_choose_full_without_parent(ceph_rbd_imagesource, rbdbackup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision = Revision.create(rbdbackup, set(), log) + revision = Revision.create(rbdrepository, set(), log) with source(revision) as s: s.backup(ChunkedFileBackend(revision, log)) @@ -118,7 +118,7 @@ def test_choose_full_without_parent(ceph_rbd_imagesource, rbdbackup, log): assert source.full.called -def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdbackup, log): +def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdrepository, log): """When backing up a revision with an immediate parent that has no corresponding snapshot, that parent must be ignored and a full backup has to be made. The diff function must not be called.""" @@ -127,12 +127,12 @@ def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdbackup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision1 = Revision.create(rbdbackup, set(), log) + revision1 = Revision.create(rbdrepository, set(), log) revision1.materialize() - rbdbackup.scan() + rbdrepository.scan() - revision2 = Revision.create(rbdbackup, set(), log) + revision2 = Revision.create(rbdrepository, set(), log) with source(revision2) as s: s.backup(ChunkedFileBackend(revision2, log)) @@ -141,7 +141,7 @@ def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdbackup, log): assert source.full.called -def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdbackup, log): +def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdrepository, log): """In an environment where a parent revision exists and has a snapshot, both revisions shall be diffed.""" source = ceph_rbd_imagesource @@ -149,16 +149,16 @@ def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdbackup, log): source.diff = mock.Mock() source.full = mock.Mock() - revision1 = Revision.create(rbdbackup, set(), log, uuid="a1") + revision1 = Revision.create(rbdrepository, set(), log, uuid="a1") revision1.materialize() # part of test setup: we check backy's behavior when a previous version not only # exists, but also has a snapshot source.rbd.snap_create("test/foo@backy-a1") - rbdbackup.scan() + rbdrepository.scan() - revision2 = Revision.create(rbdbackup, set(), log) + revision2 = Revision.create(rbdrepository, set(), log) with source(revision2) as s: s.backup(ChunkedFileBackend(revision2, log)) @@ -167,7 +167,7 @@ def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdbackup, log): assert not source.full.called -def test_diff_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): +def test_diff_backup(ceph_rbd_imagesource, rbdrepository, tmp_path, log): """When doing a diff backup between two revisions with snapshot, the RBDDiff needs to be called properly, a snapshot for the new revision needs to be created and the snapshot of the previous revision needs to be removed after the successfull backup.""" @@ -176,21 +176,21 @@ def test_diff_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): source = ceph_rbd_imagesource parent = Revision.create( - rbdbackup, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" + rbdrepository, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" ) parent.materialize() # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. revision = Revision.create( - rbdbackup, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" + rbdrepository, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" ) revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) with ChunkedFileBackend(parent, log).open("wb") as f: f.write(b"asdf") - rbdbackup.scan() + rbdrepository.scan() revision.materialize() # test setup: ensure that previous revision has a snapshot. It needs to be removed @@ -210,7 +210,7 @@ def test_diff_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): source.diff( ChunkedFileBackend(revision, log), revision.get_parent() ) - rbdbackup.history.append(revision) + rbdrepository.history.append(revision) export.assert_called_with( "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", @@ -223,14 +223,14 @@ def test_diff_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): ) -def test_full_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): +def test_full_backup(ceph_rbd_imagesource, rbdrepository, tmp_path, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdbackup, set(), log, uuid="a0") + revision = Revision.create(rbdrepository, set(), log, uuid="a0") revision.materialize() - rbdbackup.scan() + rbdrepository.scan() with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han likes Leia.") @@ -246,10 +246,10 @@ def test_full_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): assert f.read() == b"Han likes Leia." # Now make another full backup. This overwrites the first. - revision2 = Revision.create(rbdbackup, set(), log, uuid="a1") + revision2 = Revision.create(rbdrepository, set(), log, uuid="a1") revision2.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) revision2.materialize() - rbdbackup.scan() + rbdrepository.scan() with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han loves Leia.") @@ -266,7 +266,7 @@ def test_full_backup(ceph_rbd_imagesource, rbdbackup, tmp_path, log): def test_full_backup_integrates_changes( - ceph_rbd_imagesource, rbdbackup, tmp_path, log + ceph_rbd_imagesource, rbdrepository, tmp_path, log ): # The backup source changes between two consecutive full backups. Both # backup images should reflect the state of the source at the time the @@ -276,11 +276,11 @@ def test_full_backup_integrates_changes( content0 = BLOCK * b"A" + BLOCK * b"B" + BLOCK * b"C" + BLOCK * b"D" content1 = BLOCK * b"A" + BLOCK * b"X" + BLOCK * b"\0" + BLOCK * b"D" - rev0 = Revision.create(rbdbackup, set(), log) + rev0 = Revision.create(rbdrepository, set(), log) rev0.materialize() - rbdbackup.scan() + rbdrepository.scan() - rev1 = Revision.create(rbdbackup, set(), log) + rev1 = Revision.create(rbdrepository, set(), log) rev1.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) rev1.materialize() @@ -299,15 +299,15 @@ def test_full_backup_integrates_changes( assert content == f.read() -def test_verify_fail(rbdbackup, tmp_path, ceph_rbd_imagesource, log): +def test_verify_fail(rbdrepository, tmp_path, ceph_rbd_imagesource, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdbackup, set(), log) + revision = Revision.create(rbdrepository, set(), log) revision.materialize() - rbdbackup.scan() + rbdrepository.scan() rbd_source = str(tmp_path / "-dev-rbd0") with open(rbd_source, "w") as f: @@ -319,18 +319,18 @@ def test_verify_fail(rbdbackup, tmp_path, ceph_rbd_imagesource, log): # The backend has false data, so this needs to be detected. with source(revision): assert not source.verify(backend) - assert len(rbdbackup.quarantine.report_ids) == 1 + assert len(rbdrepository.quarantine.report_ids) == 1 -def test_verify(ceph_rbd_imagesource, rbdbackup, tmp_path, log): +def test_verify(ceph_rbd_imagesource, rbdrepository, tmp_path, log): source = ceph_rbd_imagesource # Those revision numbers are taken from the sample snapshot and need # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdbackup, set(), log, uuid="a0") + revision = Revision.create(rbdrepository, set(), log, uuid="a0") revision.materialize() - rbdbackup.scan() + rbdrepository.scan() rbd_source = source.rbd.map("test/foo@backy-a0")["device"] with open(rbd_source, "wb") as f: diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py index 9cd0f211..f8295889 100644 --- a/src/backy/rbd/sources/file.py +++ b/src/backy/rbd/sources/file.py @@ -1,7 +1,7 @@ from structlog.stdlib import BoundLogger import backy.rbd.chunked -from backy.rbd import RbdSource +from backy.rbd import RbdRepository from backy.rbd.quarantine import QuarantineReport from backy.rbd.sources import ( BackySource, @@ -16,11 +16,13 @@ class File(BackySource, BackySourceFactory, BackySourceContext): filename: str cow: bool revision: Revision - rbdbackup: RbdSource + repository: RbdRepository log: BoundLogger - def __init__(self, config: dict, backup: RbdSource, log: BoundLogger): - self.rbdbackup = backup + def __init__( + self, config: dict, repository: RbdRepository, log: BoundLogger + ): + self.repository = repository self.filename = config["filename"] self.cow = config.get("cow", True) self.log = log.bind(filename=self.filename, subsystem="file") @@ -67,7 +69,7 @@ def verify(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> bool: return files_are_equal( source, target_, - report=lambda s, t, o: self.rbdbackup.quarantine.add_report( + report=lambda s, t, o: self.repository.quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py index f9063026..27ec39d3 100644 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ b/src/backy/rbd/sources/flyingcircus/source.py @@ -7,18 +7,18 @@ from backy.utils import TimeOut, TimeOutError -from ... import RbdSource +from ... import RbdRepository from ..ceph.source import CephRBD class FlyingCircusRootDisk(CephRBD): snapshot_timeout = 90 - def __init__(self, config, backup: RbdSource, log: BoundLogger): + def __init__(self, config, repository: RbdRepository, log: BoundLogger): self.config = config self.vm = config["vm"] self.consul_acl_token = config.get("consul_acl_token") - super(FlyingCircusRootDisk, self).__init__(config, backup, log) + super(FlyingCircusRootDisk, self).__init__(config, repository, log) self.log = self.log.bind(vm=self.vm, subsystem="fc-disk") def create_snapshot(self, name: str) -> None: diff --git a/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py b/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py index 3a5b40bd..7e39b664 100644 --- a/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py +++ b/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py @@ -10,7 +10,7 @@ @pytest.fixture -def fcrd(log, rbdbackup): +def fcrd(log, rbdrepository): return FlyingCircusRootDisk( { "pool": "test", @@ -18,7 +18,7 @@ def fcrd(log, rbdbackup): "vm": "test01", "consul_acl_token": "12345", }, - rbdbackup, + rbdrepository, log, ) diff --git a/src/backy/rbd/tests/smoketest.sh b/src/backy/rbd/tests/smoketest.sh index 9d9e528b..3056fb14 100755 --- a/src/backy/rbd/tests/smoketest.sh +++ b/src/backy/rbd/tests/smoketest.sh @@ -3,13 +3,13 @@ set -e umask 022 -if [[ -z "$BACKY_CMD" ]]; then - echo "error: BACKY_CMD is not set. Set it manually or call via pytest" +if [[ -z "$BACKY_RBD_CMD" ]]; then + echo "error: BACKY_RBD_CMD is not set. Set it manually or call via pytest" exit 2 fi BACKUP=$(mktemp -d -t backy.test.XXXXX) -BACKY="$BACKY_CMD -l ${BACKUP}/backy.log" +BACKY="$BACKY_RBD_CMD -l ${BACKUP}/backy.log" export TZ=Europe/Berlin mkdir ${BACKUP}/backup diff --git a/src/backy/rbd/tests/test_backup.py b/src/backy/rbd/tests/test_backup.py index 594ef1e6..5e9263bd 100644 --- a/src/backy/rbd/tests/test_backup.py +++ b/src/backy/rbd/tests/test_backup.py @@ -8,66 +8,66 @@ from backy.utils import CHUNK_SIZE -def test_config(rbdbackup, tmp_path): - assert rbdbackup.path == tmp_path - assert isinstance(rbdbackup.source, File) - assert rbdbackup.source.filename == "input-file" +def test_config(rbdrepository, tmp_path): + assert rbdrepository.path == tmp_path + assert isinstance(rbdrepository.source, File) + assert rbdrepository.source.filename == "input-file" -def test_restore_target(rbdbackup, log): +def test_restore_target(rbdrepository, log): source = "input-file" target = "restore.img" with open(source, "wb") as f: f.write(b"volume contents\n") - r = create_rev(rbdbackup, {"daily"}) - rbdbackup.backup(r.uuid) - rbdbackup.restore(r.uuid, target) + r = create_rev(rbdrepository, {"daily"}) + rbdrepository.backup(r.uuid) + rbdrepository.restore(r.uuid, target) with open(source, "rb") as s, open(target, "rb") as t: assert s.read() == t.read() -def test_restore_stdout(rbdbackup, capfd, log): +def test_restore_stdout(rbdrepository, capfd, log): source = "input-file" with open(source, "wb") as f: f.write(b"volume contents\n") - r = create_rev(rbdbackup, {"daily"}) - rbdbackup.backup(r.uuid) - rbdbackup.restore(r.uuid, "-") + r = create_rev(rbdrepository, {"daily"}) + rbdrepository.backup(r.uuid) + rbdrepository.restore(r.uuid, "-") assert not os.path.exists("-") out, err = capfd.readouterr() assert "volume contents\n" == out -def test_restore_backy_extract(rbdbackup, monkeypatch, log): +def test_restore_backy_extract(rbdrepository, monkeypatch, log): check_output = mock.Mock(return_value="backy-extract 1.1.0") monkeypatch.setattr(subprocess, "check_output", check_output) - rbdbackup.restore_backy_extract = mock.Mock() + rbdrepository.restore_backy_extract = mock.Mock() source = "input-file" with open(source, "wb") as f: f.write(b"a" * CHUNK_SIZE) - r = create_rev(rbdbackup, {"daily"}) - rbdbackup.backup(r.uuid) - rbdbackup.restore(r.uuid, "restore.img") + r = create_rev(rbdrepository, {"daily"}) + rbdrepository.backup(r.uuid) + rbdrepository.restore(r.uuid, "restore.img") check_output.assert_called() - rbdbackup.restore_backy_extract.assert_called_once_with( - rbdbackup.find("0"), "restore.img" + rbdrepository.restore_backy_extract.assert_called_once_with( + rbdrepository.find("0"), "restore.img" ) -def test_backup_corrupted(rbdbackup, log): +def test_backup_corrupted(rbdrepository, log): source = "input-file" with open(source, "wb") as f: f.write(b"volume contents\n") - r = create_rev(rbdbackup, {"daily"}) - rbdbackup.backup(r.uuid) + r = create_rev(rbdrepository, {"daily"}) + rbdrepository.backup(r.uuid) - store = ChunkedFileBackend(rbdbackup.history[0], log).store + store = ChunkedFileBackend(rbdrepository.history[0], log).store chunk_path = store.chunk_path(next(iter(store.seen))) os.chmod(chunk_path, 0o664) with open(chunk_path, "wb") as f: f.write(b"invalid") - r2 = create_rev(rbdbackup, {"daily"}) - rbdbackup.backup(r2.uuid) + r2 = create_rev(rbdrepository, {"daily"}) + rbdrepository.backup(r2.uuid) - assert rbdbackup.history == [] + assert rbdrepository.history == [] assert not os.path.exists(chunk_path) diff --git a/src/backy/rbd/tests/test_backy-rbd.py b/src/backy/rbd/tests/test_backy-rbd.py index d9d9418f..89d4b877 100644 --- a/src/backy/rbd/tests/test_backy-rbd.py +++ b/src/backy/rbd/tests/test_backy-rbd.py @@ -3,8 +3,8 @@ import pytest -from backy.ext_deps import BACKY_CMD, BASH -from backy.rbd import RbdSource +from backy.ext_deps import BACKY_RBD_CMD, BASH +from backy.rbd import RbdRepository from backy.rbd.conftest import create_rev from backy.revision import Revision from backy.tests import Ellipsis @@ -39,74 +39,74 @@ def test_smoketest_internal(tmp_path, log): % source1 ).encode("utf-8") ) - backup = RbdSource(backup_dir, log) + repository = RbdRepository(backup_dir, log) # Backup first state - rev1 = create_rev(backup, {"manual:test"}) - backup.backup(rev1.uuid) + rev1 = create_rev(repository, {"manual:test"}) + repository.backup(rev1.uuid) # Restore first state from the newest revision restore_target = str(tmp_path / "image1.restore") - backup.restore(rev1.uuid, restore_target) + repository.restore(rev1.uuid, restore_target) with pytest.raises(IOError): - open(backup.history[-1].filename, "wb") + open(repository.history[-1].filename, "wb") with pytest.raises(IOError): - open(backup.history[-1].info_filename, "wb") + open(repository.history[-1].info_filename, "wb") assert open(source1, "rb").read() == open(restore_target, "rb").read() # Backup second state - backup.source.filename = source2 - rev2 = create_rev(backup, {"test"}) - backup.backup(rev2.uuid) - assert len(backup.history) == 2 + repository.source.filename = source2 + rev2 = create_rev(repository, {"test"}) + repository.backup(rev2.uuid) + assert len(repository.history) == 2 # Restore second state from second backup which is the newest at position 0 - backup.restore(rev2.uuid, restore_target) + repository.restore(rev2.uuid, restore_target) d1 = open(source2, "rb").read() d2 = open(restore_target, "rb").read() assert d1 == d2 # Our original backup is now at position 1. Lets restore that again. - backup.restore(rev1.uuid, restore_target) + repository.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() # Backup second state again - backup.source.filename = source2 - rev3 = create_rev(backup, {"manual:test"}) - backup.backup(rev3.uuid) - assert len(backup.history) == 3 + repository.source.filename = source2 + rev3 = create_rev(repository, {"manual:test"}) + repository.backup(rev3.uuid) + assert len(repository.history) == 3 # Restore image2 from its most recent at position 0 - backup.restore(rev3.uuid, restore_target) + repository.restore(rev3.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Restore image2 from its previous backup, now at position 1 - backup.restore(rev2.uuid, restore_target) + repository.restore(rev2.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Our original backup is now at position 2. Lets restore that again. - backup.restore(rev1.uuid, restore_target) + repository.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() # Backup third state - backup.source.filename = source3 - rev4 = create_rev(backup, {"test"}) - backup.backup(rev4.uuid) - assert len(backup.history) == 4 + repository.source.filename = source3 + rev4 = create_rev(repository, {"test"}) + repository.backup(rev4.uuid) + assert len(repository.history) == 4 # Restore image3 from the most curent state - backup.restore(rev4.uuid, restore_target) + repository.restore(rev4.uuid, restore_target) assert open(source3, "rb").read() == open(restore_target, "rb").read() # Restore image2 from position 1 and 2 - backup.restore(rev3.uuid, restore_target) + repository.restore(rev3.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() - backup.restore(rev2.uuid, restore_target) + repository.restore(rev2.uuid, restore_target) assert open(source2, "rb").read() == open(restore_target, "rb").read() # Restore image1 from position 3 - backup.restore(rev1.uuid, restore_target) + repository.restore(rev1.uuid, restore_target) assert open(source1, "rb").read() == open(restore_target, "rb").read() @@ -114,7 +114,7 @@ def test_smoketest_internal(tmp_path, log): def test_smoketest_external(): output = subprocess.check_output( [BASH, os.path.dirname(__file__) + "/smoketest.sh"], - env=os.environ | {"BACKY_CMD": BACKY_CMD}, + env=os.environ | {"BACKY_RBD_CMD": BACKY_RBD_CMD}, ) output = output.decode("utf-8") assert ( diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 31cb60e9..e5d68503 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -96,7 +96,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): ) monkeypatch.setattr( - backy.rbd.RbdSource, + backy.rbd.RbdRepository, "backup", partialmethod(print_args, return_value=success), ) @@ -108,7 +108,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): assert ( Ellipsis( """\ -(, 'asdf') +(, 'asdf') {} """ ) @@ -129,17 +129,17 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): # TODO: test call restore, verify, gc def test_call_unexpected_exception( - capsys, rbdbackup, argv, monkeypatch, log, tmp_path + capsys, rbdrepository, argv, monkeypatch, log, tmp_path ): def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.rbd.RbdSource, "gc", do_raise) + monkeypatch.setattr(backy.rbd.RbdRepository, "gc", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) - argv.extend(["-b", str(rbdbackup.path), "gc"]) + argv.extend(["-b", str(rbdrepository.path), "gc"]) utils.log_data = "" with pytest.raises(SystemExit): main() diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index ea638a23..dad69f1e 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,4 +1,4 @@ -from backy.rbd import RbdSource +from backy.rbd import RbdRepository from backy.rbd.sources.ceph.source import CephRBD @@ -17,7 +17,7 @@ def test_configure_ceph_source(tmp_path, log): image: test04 """ ) - backup = RbdSource(tmp_path, log) + backup = RbdRepository(tmp_path, log) assert isinstance(backup.source, CephRBD) assert backup.source.pool == "test" assert backup.source.image == "test04" diff --git a/src/backy/repository.py b/src/backy/repository.py index 4c4d9d6e..279f5f64 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -3,12 +3,13 @@ import re from math import ceil, floor from pathlib import Path -from typing import List, Literal, Optional, TypedDict +from typing import IO, List, Literal, Optional, TypedDict import tzlocal import yaml from structlog.stdlib import BoundLogger +from backy.source import Source from backy.utils import ( duplicates, list_get, @@ -20,8 +21,6 @@ from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule -from backy.source import Source - class RepositoryNotEmpty(RuntimeError): @@ -54,10 +53,12 @@ class Repository(object): log: BoundLogger _by_uuid: dict[str, Revision] + _lock_fds: dict[str, IO] def __init__(self, path: Path, log: BoundLogger): self.log = log.bind(subsystem="backup") self.path = path.resolve() + self._lock_fds = {} # Load config from file try: @@ -74,9 +75,11 @@ def __init__(self, path: Path, log: BoundLogger): self.schedule = Schedule() self.schedule.configure(self.config["schedule"]) + self.scan() + @classmethod def init(self, path: Path, log: BoundLogger, source: Source): - if (path / 'config').exists(): + if (path / "config").exists(): raise RepositoryNotEmpty(self.path) if not path.exists(): @@ -84,14 +87,13 @@ def init(self, path: Path, log: BoundLogger, source: Source): source_config = source.init(path, log) - config = {'schedule': {}, 'source': source_config} + config = {"schedule": {}, "source": source_config} - with open(self.path / 'config', 'w') as f: - yaml.dump(f, config) + with open(self.path / "config", "w") as f: + yaml.dump(config, f) self.log.info(f"Initialized empty repository in {self.path}") - @property def problem_reports(self) -> list[str]: return [] @@ -146,7 +148,6 @@ def locked_function(self, *args, skip_lock=False, **kw): def name(self) -> str: return self.path.name - def to_dict(self): return self.config @@ -243,6 +244,10 @@ def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): ) raise RuntimeError("Remote revs disallowed") + @locked(target=".backup", mode="exclusive") + def run_with_backup_lock(self, fun, *args, **kw): + return fun(*args, **kw) + ################# # Making backups diff --git a/src/backy/revision.py b/src/backy/revision.py index b9648ca1..7cb06a99 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -11,7 +11,7 @@ from .utils import SafeFile if TYPE_CHECKING: - from .backup import Backup + from .repository import Repository TAG_MANUAL_PREFIX = "manual:" @@ -32,7 +32,7 @@ def filter_manual_tags(tags): class Revision(object): - backup: "Backup" + repository: "Repository" uuid: str timestamp: datetime.datetime stats: dict @@ -44,12 +44,12 @@ class Revision(object): def __init__( self, - backup: "Backup", + repository: "Repository", log: BoundLogger, uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, ) -> None: - self.backup = backup + self.repository = repository self.uuid = uuid if uuid else shortuuid.uuid() self.timestamp = timestamp if timestamp else utils.now() self.stats = {"bytes_written": 0} @@ -60,7 +60,7 @@ def __init__( @classmethod def create( cls, - backup: "Backup", + backup: "Repository", tags: set[str], log: BoundLogger, *, @@ -71,7 +71,9 @@ def create( return r @classmethod - def load(cls, file: Path, backup: "Backup", log: BoundLogger) -> "Revision": + def load( + cls, file: Path, backup: "Repository", log: BoundLogger + ) -> "Revision": with file.open(encoding="utf-8") as f: metadata = yaml.safe_load(f) r = cls.from_dict(metadata, backup, log) @@ -95,7 +97,7 @@ def from_dict(cls, metadata, backup, log): @property def filename(self) -> Path: """Full pathname of the image file.""" - return self.backup.path / self.uuid + return self.repository.path / self.uuid @property def info_filename(self) -> Path: @@ -154,9 +156,9 @@ def remove(self, force=False) -> None: filename.unlink() self.log.debug("remove-end", filename=filename) - if self in self.backup.history: - self.backup.history.remove(self) - del self.backup._by_uuid[self.uuid] + if self in self.repository.history: + self.repository.history.remove(self) + del self.repository._by_uuid[self.uuid] def writable(self) -> None: if self.filename.exists(): @@ -171,7 +173,7 @@ def readonly(self) -> None: def get_parent(self, ignore_trust=False) -> Optional["Revision"]: """defaults to last rev if not in history""" prev = None - for r in self.backup.history: + for r in self.repository.history: if not ignore_trust and r.trust == Trust.DISTRUSTED: continue if r.server != self.server: diff --git a/src/backy/schedule.py b/src/backy/schedule.py index 244712b5..474ff9f4 100644 --- a/src/backy/schedule.py +++ b/src/backy/schedule.py @@ -1,10 +1,13 @@ import copy import datetime from datetime import timedelta -from typing import Dict +from typing import TYPE_CHECKING, Dict, Iterable, List, Set, Tuple import backy.utils -from backy.revision import filter_schedule_tags +from backy.revision import Revision, filter_schedule_tags + +if TYPE_CHECKING: + from backy.repository import Repository MINUTE = 60 HOUR = 60 * MINUTE @@ -57,21 +60,23 @@ def __init__(self): self.schedule = {} self.config = {} - def configure(self, config): + def configure(self, config: dict) -> None: self.config = config self.schedule = copy.deepcopy(config) for tag, spec in self.schedule.items(): self.schedule[tag]["interval"] = parse_duration(spec["interval"]) - def to_dict(self): + def to_dict(self) -> dict: return self.config - def next(self, relative, spread, archive): + def next( + self, relative: datetime.datetime, spread: int, repository: "Repository" + ) -> Tuple[datetime.datetime, Set[str]]: time, tags = ideal_time, ideal_tags = self._next_ideal(relative, spread) - missed_tags = self._missed(archive) + missed_tags = self._missed(repository) # The next run will include all missed tags tags.update(missed_tags) - if missed_tags and len(archive.history): + if missed_tags and len(repository.history): # Perform an immediate backup if we have any history at all. # and when we aren't running a regular backup within the next # 5 minutes anyway. @@ -81,7 +86,9 @@ def next(self, relative, spread, archive): tags = missed_tags return time, tags - def _next_ideal(self, relative, spread): + def _next_ideal( + self, relative: datetime.datetime, spread: int + ) -> Tuple[datetime.datetime, Set[str]]: next_times: Dict[datetime.datetime, set] = {} for tag, settings in self.schedule.items(): t = next_times.setdefault( @@ -92,11 +99,11 @@ def _next_ideal(self, relative, spread): next_tags = next_times[next_time] return next_time, next_tags - def _missed(self, archive): + def _missed(self, repository: "Repository") -> Set[str]: # Check whether we missed any now = backy.utils.now() missing_tags = set(self.schedule.keys()) - for tag, last in archive.last_by_tag().items(): + for tag, last in repository.last_by_tag().items(): if tag not in self.schedule: # Ignore ad-hoc tags for catching up. continue @@ -105,19 +112,19 @@ def _missed(self, archive): missing_tags.remove(tag) return missing_tags - def expire(self, backup): + def expire(self, repository: "Repository") -> List["Revision"]: """Remove old revisions according to the backup schedule. Returns list of removed revisions. """ - backup.scan() + repository.scan() removed = [] # Clean out old backups: keep at least a certain number of copies # (keep) and ensure that we don't throw away copies that are newer # than keep * interval for this tag. # Phase 1: remove tags that are expired for tag, args in self.schedule.items(): - revisions = backup.find_revisions("tag:" + tag) + revisions = repository.find_revisions("tag:" + tag) keep = args["keep"] if len(revisions) < keep: continue @@ -129,7 +136,7 @@ def expire(self, backup): old_revision.write_info() # Phase 2: remove all tags which have been created by a former schedule - for revision in backup.history: + for revision in repository.history: expired_tags = ( filter_schedule_tags(revision.tags) - self.schedule.keys() ) @@ -140,7 +147,7 @@ def expire(self, backup): # Phase 3: delete revisions that have no tags any more. # We are deleting items of the history while iterating over it. # Use a copy of the list! - for revision in list(backup.history): + for revision in list(repository.history): if revision.tags: continue removed.append(revision) @@ -148,7 +155,7 @@ def expire(self, backup): return removed - def sorted_tags(self, tags): + def sorted_tags(self, tags: Iterable[str]) -> Iterable[str]: """Return a list of tags, sorted by their interval. Smallest first.""" t = {} for tag in tags: diff --git a/src/backy/source.py b/src/backy/source.py index 2cd932d2..07d21ce3 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,10 +1,12 @@ -from typing import Any from pathlib import Path +from typing import Any + from structlog.stdlib import BoundLogger + from backy.file import FileSource # XXX Use plugin discovery here -KNOWN_SOURCES: dict[str, "Source"] = { s.type_: s for s in [FileSource] } +KNOWN_SOURCES: dict[str, "Source"] = {s.type_: s for s in [FileSource]} class Source: @@ -14,4 +16,4 @@ class Source: @classmethod def init(cls, repository: Path, log: BoundLogger) -> dict[str, Any]: - return {'type': cls.type_} + return {"type": cls.type_} diff --git a/src/backy/tests/conftest.py b/src/backy/tests/conftest.py index b797d016..f52ab804 100644 --- a/src/backy/tests/conftest.py +++ b/src/backy/tests/conftest.py @@ -1,12 +1,12 @@ -from zoneinfo import ZoneInfo +import json import pytest -import tzlocal + +from backy.repository import Repository @pytest.fixture -def tz_berlin(monkeypatch): - """Fix time zone to gain independece from runtime environment.""" - monkeypatch.setattr( - tzlocal, "get_localzone", lambda: ZoneInfo("Europe/Berlin") - ) +def repository(schedule, tmp_path, log): + with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: + json.dump({"schedule": schedule.to_dict()}, f) + return Repository(tmp_path, log) diff --git a/src/backy/tests/samples/sample1.rev b/src/backy/tests/samples/sample1.rev new file mode 100644 index 00000000..a980413f --- /dev/null +++ b/src/backy/tests/samples/sample1.rev @@ -0,0 +1,5 @@ +--- + uuid: asdf + type: full + timestamp: 2015-08-01T20:00:00+00:00 + parent: diff --git a/src/backy/tests/samples/sample2.rev b/src/backy/tests/samples/sample2.rev new file mode 100644 index 00000000..523fadcc --- /dev/null +++ b/src/backy/tests/samples/sample2.rev @@ -0,0 +1,4 @@ +--- + uuid: asdf2 + timestamp: 2015-08-01T21:00:00+00:00 + parent: asdf diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index 3a325684..a2ae1b5f 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -6,7 +6,7 @@ @pytest.fixture -def backup_with_revisions(backup, tmp_path): +def repository_with_revisions(repository, tmp_path): with open(str(tmp_path / "123-0.rev"), "wb") as f: f.write( b"""\ @@ -40,33 +40,33 @@ def backup_with_revisions(backup, tmp_path): tags: [daily] """ ) - backup.scan() - return backup + repository.scan() + return repository -def test_empty_revisions(backup): - assert backup.history == [] +def test_empty_revisions(repository): + assert repository.history == [] -def test_find_revision_empty(backup): +def test_find_revision_empty(repository): with pytest.raises(KeyError): - backup.find("-1") + repository.find("-1") with pytest.raises(KeyError): - backup.find("last") + repository.find("last") with pytest.raises(KeyError): - backup.find("fdasfdka") + repository.find("fdasfdka") -def test_load_revisions(backup_with_revisions): - a = backup_with_revisions +def test_load_revisions(repository_with_revisions): + a = repository_with_revisions assert [x.uuid for x in a.history] == ["123-0", "123-1", "123-2"] assert a.history[0].get_parent() is None assert a.history[1].get_parent() is None assert a.history[2].get_parent().uuid == "123-1" -def test_find_revisions(backup_with_revisions): - a = backup_with_revisions +def test_find_revisions(repository_with_revisions): + a = repository_with_revisions assert a.find_revisions("all") == a.history assert a.find_revisions("1") == [a.find("1")] assert a.find_revisions("tag:dail") == [] @@ -114,9 +114,9 @@ def test_find_revisions(backup_with_revisions): assert a.find_revisions( "first(trust:verified)..last(reverse(2015-08-30..))" ) == [ - a.find("123-0"), - a.find("123-1"), - ] + a.find("123-0"), + a.find("123-1"), + ] assert a.find_revisions("reverse(not(clean))") == [ a.find("123-2"), ] @@ -140,8 +140,8 @@ def test_find_revisions(backup_with_revisions): ] -def test_find_revisions_should_raise_invalid_spec(backup_with_revisions): - a = backup_with_revisions +def test_find_revisions_should_raise_invalid_spec(repository_with_revisions): + a = repository_with_revisions with pytest.raises(KeyError): a.find_revisions("aaaa..125") with pytest.raises(AssertionError): @@ -156,8 +156,8 @@ def test_find_revisions_should_raise_invalid_spec(backup_with_revisions): a.find_revisions("2015-09..2015-08-30") -def test_find_revision(backup_with_revisions): - a = backup_with_revisions +def test_find_revision(repository_with_revisions): + a = repository_with_revisions assert a.find("last").uuid == "123-2" with pytest.raises(KeyError): a.find("-1") @@ -175,39 +175,39 @@ def test_find_revision(backup_with_revisions): assert a.find(" first( tag:monthly ) ").uuid == "123-0" -def test_get_history(backup_with_revisions): - assert 2 == len(backup_with_revisions.clean_history) +def test_get_history(repository_with_revisions): + assert 2 == len(repository_with_revisions.clean_history) assert ( - backup_with_revisions.clean_history - == backup_with_revisions.get_history(clean=True) + repository_with_revisions.clean_history + == repository_with_revisions.get_history(clean=True) ) - assert 1 == len(backup_with_revisions.local_history) + assert 1 == len(repository_with_revisions.local_history) assert ( - backup_with_revisions.local_history - == backup_with_revisions.get_history(local=True) + repository_with_revisions.local_history + == repository_with_revisions.get_history(local=True) + ) + assert 1 == len( + repository_with_revisions.get_history(clean=True, local=True) ) - assert 1 == len(backup_with_revisions.get_history(clean=True, local=True)) -def test_ignore_duplicates(backup_with_revisions, tmp_path): +def test_ignore_duplicates(repository_with_revisions, tmp_path): shutil.copy(str(tmp_path / "123-2.rev"), str(tmp_path / "123-3.rev")) - a = backup_with_revisions + a = repository_with_revisions a.scan() assert 3 == len(a.history) -def test_find(simple_file_config, tmp_path, log): - backup = simple_file_config - rev = Revision.create(backup, set(), log, uuid="123-456") +def test_find(repository, tmp_path, log): + rev = Revision.create(repository, set(), log, uuid="123-456") rev.materialize() - backup.scan() - assert tmp_path / "123-456" == backup.find("0").filename + repository.scan() + assert tmp_path / "123-456" == repository.find("0").filename -def test_find_should_raise_if_not_found(simple_file_config, log): - backup = simple_file_config - rev = Revision.create(backup, set(), log) +def test_find_should_raise_if_not_found(repository, log): + rev = Revision.create(repository, set(), log) rev.materialize() - backup.scan() + repository.scan() with pytest.raises(KeyError): - backup.find("no such revision") + repository.find("no such revision") diff --git a/src/backy/tests/test_revision.py b/src/backy/tests/test_revision.py index df9dae58..5faf1a16 100644 --- a/src/backy/tests/test_revision.py +++ b/src/backy/tests/test_revision.py @@ -11,45 +11,45 @@ SAMPLE_DIR = Path(__file__).parent.joinpath("samples") -def test_revision_base(backup, log): - revision = Revision.create(backup, set(), log, uuid="uuid") +def test_revision_base(repository, log): + revision = Revision.create(repository, set(), log, uuid="uuid") assert revision.uuid == "uuid" - assert revision.backup is backup + assert revision.repository is repository -def test_revision_create(backup, log): - backup.history = [] - r = Revision.create(backup, {"1", "2"}, log) +def test_revision_create(repository, log): + repository.history = [] + r = Revision.create(repository, {"1", "2"}, log) assert r.uuid is not None assert r.tags == {"1", "2"} assert (backy.utils.now() - r.timestamp).total_seconds() < 10 - assert r.backup is backup + assert r.repository is repository -def test_revision_create_child(backup, log): - backup.history = [Revision.create(backup, set(), log, uuid="asdf")] - r = Revision.create(backup, {"test"}, log) +def test_revision_create_child(repository, log): + repository.history = [Revision.create(repository, set(), log, uuid="asdf")] + r = Revision.create(repository, {"test"}, log) assert r.uuid is not None assert r.tags == {"test"} assert r.get_parent().uuid == "asdf" assert (backy.utils.now() - r.timestamp).total_seconds() < 10 - assert r.backup is backup + assert r.repository is repository -def test_load_sample1(backup, log): - r = Revision.load(SAMPLE_DIR / "sample1.rev", backup, log) +def test_load_sample1(repository, log): + r = Revision.load(SAMPLE_DIR / "sample1.rev", repository, log) assert r.uuid == "asdf" assert r.timestamp == datetime.datetime(2015, 8, 1, 20, 0, tzinfo=UTC) assert r.get_parent() is None - assert r.backup is backup + assert r.repository is repository -def test_load_sample2(backup, log): - r = Revision.load(SAMPLE_DIR / "sample2.rev", backup, log) +def test_load_sample2(repository, log): + r = Revision.load(SAMPLE_DIR / "sample2.rev", repository, log) assert r.uuid == "asdf2" assert r.timestamp == datetime.datetime(2015, 8, 1, 21, 0, tzinfo=UTC) assert r.get_parent() is None - assert r.backup is backup + assert r.repository is repository def test_filenames_based_on_uuid_and_backup_dir(log): @@ -60,9 +60,9 @@ def test_filenames_based_on_uuid_and_backup_dir(log): assert r.info_filename == Path("/srv/backup/foo/asdf.rev") -def test_store_revision_data(backup, clock, log): - backup.history = [Revision.create(backup, set(), log, uuid="asdf")] - r = Revision.create(backup, set(), log, uuid="asdf2") +def test_store_revision_data(repository, clock, log): + repository.history = [Revision.create(repository, set(), log, uuid="asdf")] + r = Revision.create(repository, set(), log, uuid="asdf2") r.write_info() with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { @@ -77,8 +77,8 @@ def test_store_revision_data(backup, clock, log): } -def test_store_revision_data_no_parent(backup, clock, log): - r = Revision.create(backup, set(), log, uuid="asdf2") +def test_store_revision_data_no_parent(repository, clock, log): + r = Revision.create(repository, set(), log, uuid="asdf2") r.write_info() with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { @@ -93,15 +93,15 @@ def test_store_revision_data_no_parent(backup, clock, log): } -def test_delete_revision(backup, log): - r = Revision.create(backup, set(), log, uuid="123-456") +def test_delete_revision(repository, log): + r = Revision.create(repository, set(), log, uuid="123-456") r.materialize() - assert backup.path.joinpath("123-456.rev").exists() - backup.scan() - backup.path.joinpath("123-456").open("w") - assert backup.path.joinpath("123-456.rev").exists() + assert repository.path.joinpath("123-456.rev").exists() + repository.scan() + repository.path.joinpath("123-456").open("w") + assert repository.path.joinpath("123-456.rev").exists() r.remove() # Ensure the revision data file exists - we do not implicitly create # it any longer. - assert not backup.path.joinpath("123-456").exists() - assert not backup.path.joinpath("123-456.rev").exists() + assert not repository.path.joinpath("123-456").exists() + assert not repository.path.joinpath("123-456.rev").exists() diff --git a/src/backy/tests/test_schedule.py b/src/backy/tests/test_schedule.py index 710e8b58..8ee3a093 100644 --- a/src/backy/tests/test_schedule.py +++ b/src/backy/tests/test_schedule.py @@ -21,7 +21,7 @@ def test_parse_duration(): def test_first_backup_catches_up_all_tags_immediately_in_next_interval( - schedule, backup, clock + schedule, repository, clock ): schedule.configure( { @@ -32,17 +32,17 @@ def test_first_backup_catches_up_all_tags_immediately_in_next_interval( assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily", "test"}, - ) == schedule.next(backy.utils.now(), 1, backup) + ) == schedule.next(backy.utils.now(), 1, repository) -def test_tag_first_interval_after_now(schedule, backup, clock): +def test_tag_first_interval_after_now(schedule, repository, clock): assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, ) == schedule._next_ideal(backy.utils.now(), 1) -def test_tag_second_interval_after_now(schedule, backup, clock): +def test_tag_second_interval_after_now(schedule, repository, clock): assert ( datetime(2015, 9, 3, 0, 0, 5, tzinfo=UTC), {"daily"}, @@ -51,7 +51,7 @@ def test_tag_second_interval_after_now(schedule, backup, clock): ) -def test_tag_second_interval_with_different_spread(schedule, backup, clock): +def test_tag_second_interval_with_different_spread(schedule, repository, clock): assert ( datetime(2015, 9, 3, 0, 0, 5, tzinfo=UTC), {"daily"}, @@ -60,34 +60,34 @@ def test_tag_second_interval_with_different_spread(schedule, backup, clock): ) -def test_tag_catchup_not_needed_for_recent(schedule, backup, clock): +def test_tag_catchup_not_needed_for_recent(schedule, repository, clock): # A recent backup does not cause catchup to be triggered. revision = mock.Mock() revision.timestamp = clock.now() - timedelta(seconds=15) revision.tags = {"daily"} revision.stats = {"duration": 10} - backup.history.append(revision) - assert set() == schedule._missed(backup) + repository.history.append(revision) + assert set() == schedule._missed(repository) # This in turn causes the main next() function to return the regular next # interval. assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) def test_tag_catchup_does_not_stumble_on_adhoc_tags_in_backup( - schedule, backup, clock + schedule, repository, clock ): revision = mock.Mock() revision.timestamp = clock.now() - timedelta(seconds=15) revision.tags = {"test"} revision.stats = {"duration": 10} - backup.history.append(revision) - assert {"daily"} == schedule._missed(backup) + repository.history.append(revision) + assert {"daily"} == schedule._missed(repository) -def test_tag_catchup_until_5_minutes_before_next(schedule, backup, clock): +def test_tag_catchup_until_5_minutes_before_next(schedule, repository, clock): # If a backup has been overdue for too long, we expect the # tag to be scheduled soon anyway and we do not catch up to avoid # overload issues. @@ -96,29 +96,31 @@ def test_tag_catchup_until_5_minutes_before_next(schedule, backup, clock): revision.tags = {"daily"} revision.stats = {"duration": 10} revision.write_info() - backup.history.append(revision) - assert {"daily"} == schedule._missed(backup) + repository.history.append(revision) + assert {"daily"} == schedule._missed(repository) # This in turn causes the main next() function to return the regular next # interval. assert ( datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), {"daily"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) # As we approach the 5 minute mark before the next regular interval, # we then flip towards the ideal time. clock.now.return_value = datetime(2015, 9, 1, 23, 55, 0, tzinfo=UTC) assert (clock.now(), {"daily"}) == schedule.next( - datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, backup + datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, repository ) clock.now.return_value = datetime(2015, 9, 1, 23, 55, 1, tzinfo=UTC) assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, - ) == schedule.next(datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, backup) + ) == schedule.next( + datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, repository + ) -def test_tag_catchup_needed_for_recently_missed(backup, clock): +def test_tag_catchup_needed_for_recently_missed(repository, clock): revision = mock.Mock() schedule = backy.schedule.Schedule() @@ -137,59 +139,59 @@ def test_tag_catchup_needed_for_recently_missed(backup, clock): revision.timestamp = clock.now() - timedelta(seconds=(24 * 60 * 60) * 1.2) revision.tags = {"daily"} revision.stats = {"duration": 10} - backup.history.append(revision) + repository.history.append(revision) - assert {"daily", "weekly", "hourly"} == schedule._missed(backup) + assert {"daily", "weekly", "hourly"} == schedule._missed(repository) # This in turn causes the main next() function to also # return this date. assert ( datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), {"daily", "weekly", "hourly"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) def test_do_not_expire_if_less_than_keep_and_inside_keep_interval( - schedule, backup, clock, log + schedule, repository, clock, log ): def add_revision(timestamp): - revision = Revision.create(backup, {"daily"}, log) - revision.uuid = str(len(backup.history) + 1) + revision = Revision.create(repository, {"daily"}, log) + revision.uuid = str(len(repository.history) + 1) revision.timestamp = timestamp revision.materialize() - backup.history.append(revision) - backup.history.sort(key=lambda x: x.timestamp) + repository.history.append(revision) + repository.history.sort(key=lambda x: x.timestamp) return revision clock.now.return_value = datetime(2014, 5, 10, 10, 0, tzinfo=UTC) add_revision(datetime(2014, 5, 10, 10, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - backup.scan() - assert len(backup.history) == 1 - assert backup.history[0].tags == {"daily"} + assert [] == schedule.expire(repository) + repository.scan() + assert len(repository.history) == 1 + assert repository.history[0].tags == {"daily"} add_revision(datetime(2014, 5, 9, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 8, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 7, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 6, 10, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - backup.scan() - assert len(backup.history) == 5 - assert [{"daily"}] * 5 == [r.tags for r in backup.history] + assert [] == schedule.expire(repository) + repository.scan() + assert len(repository.history) == 5 + assert [{"daily"}] * 5 == [r.tags for r in repository.history] # This is the one revision more than the basic 'keep' parameter # but its still within the keep*interval frame so we keep it. add_revision(datetime(2014, 5, 6, 11, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - assert [{"daily"}] * 6 == [r.tags for r in backup.history] + assert [] == schedule.expire(repository) + assert [{"daily"}] * 6 == [r.tags for r in repository.history] # This revision is more than keep and also outside the interval. # It gets its tag removed and disappears. r = add_revision(datetime(2014, 5, 4, 11, 0, tzinfo=UTC)) assert r.filename.with_suffix(".rev").exists() - removed = [x for x in schedule.expire(backup)] + removed = [x for x in schedule.expire(repository)] assert [r.uuid] == [x.uuid for x in removed] - backup.scan() - assert [{"daily"}] * 6 == [rev.tags for rev in backup.history] + repository.scan() + assert [{"daily"}] * 6 == [rev.tags for rev in repository.history] assert not r.filename.with_suffix(".rev").exists() # If we have manual tags, then those do not expire. However, the @@ -199,11 +201,11 @@ def add_revision(timestamp): r.tags = {"daily", "manual:test", "unknown"} r.write_info() assert r.filename.with_suffix(".rev").exists() - expired = schedule.expire(backup) + expired = schedule.expire(repository) assert [] == [x.uuid for x in expired] - backup.scan() + repository.scan() assert [{"manual:test"}] + [{"daily"}] * 6 == [ - rev.tags for rev in backup.history + rev.tags for rev in repository.history ] assert r.filename.with_suffix(".rev").exists() diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index f0858338..14c2b1ef 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -9,13 +9,13 @@ from backy.tests import Ellipsis from backy.utils import ( SafeFile, + TimeOut, + TimeOutError, + _fake_fallocate, copy_overwrite, files_are_equal, files_are_roughly_equal, punch_hole, - TimeOut, - TimeOutError, - _fake_fallocate, ) @@ -343,7 +343,6 @@ def test_unmocked_now_returns_time_time_float(): assert before <= now <= after - @pytest.fixture def testfile(tmp_path): fn = str(tmp_path / "myfile") @@ -383,6 +382,7 @@ def test_fake_fallocate_only_punches_holes(testfile): with open(testfile, "r+b") as f: _fake_fallocate(f, 0, 0, 10) + def test_timeout(capsys): timeout = TimeOut(0.05, 0.01) while timeout.tick(): diff --git a/src/backy/utils.py b/src/backy/utils.py index f032c8ac..97452a8b 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -15,12 +15,12 @@ import typing from asyncio import Event from typing import IO, Callable, Iterable, List, Literal, Optional, TypeVar +from zoneinfo import ZoneInfo import aiofiles.os as aos import humanize import structlog import tzlocal -from zoneinfo import ZoneInfo from .ext_deps import CP @@ -473,9 +473,7 @@ async def has_recent_changes(path: str, reference_time: float) -> bool: return False -async def delay_or_event( - delay: float, event: Event -) -> Optional[Literal[True]]: +async def delay_or_event(delay: float, event: Event) -> Optional[Literal[True]]: return await next( asyncio.as_completed([asyncio.sleep(delay), event.wait()]) ) From 5d64d27a1265d4044cbb6866a8f3eba44229ad26 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Wed, 26 Jun 2024 16:02:25 +0200 Subject: [PATCH 11/25] snapshot: make backup source types pluggable and provide an init cmd --- pyproject.toml | 5 +++++ src/backy/cli/__init__.py | 14 +++++++------- src/backy/file/__init__.py | 9 ++------- src/backy/repository.py | 10 +++++----- src/backy/source.py | 7 ++++--- 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ff87633a..6043ac54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,11 @@ zest-releaser = "^9.1.1" requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" +[tool.poetry.plugins.'backy.sources'] +file = 'backy.file:FileSource' +rbd = 'backy.rbd:RBDSource' +s3 = 'backy.s3:S3Source' + [tool.poetry.scripts] backy = "backy.cli:main" backyd = "backy.daemon:main" diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index 1ab74ee5..adc4d6fd 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -21,7 +21,7 @@ from backy import logging # XXX invert this dependency -from backy.rbd.backup import RbdRepository, RestoreBackend +from backy.rbd.backup import RestoreBackend from backy.repository import Repository from backy.utils import format_datetime_local, generate_taskid @@ -47,7 +47,7 @@ def __call__(self, cmdname: str, args: dict[str, Any]): return ret def init(self, type): - source = backy.source.KNOWN_SOURCES[type] + source = backy.source.factory_by_type(type) Repository.init(self.path, self.log, source=source) def status(self, yaml_: bool, revision: str) -> None: @@ -343,15 +343,15 @@ def main(): "working directory." ), ) - p.add_argument( - "type", - choices=list(backy.source.KNOWN_SOURCES), - help="Type of the source.", - ) subparsers = parser.add_subparsers() p = subparsers.add_parser("init", help="Create an empty backy repository.") + p.add_argument( + "type", + choices=backy.source.SOURCE_PLUGINS.names, + help="Type of the source.", + ) p.set_defaults(func="init") p = subparsers.add_parser("jobs", help="List status of all known jobs") diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 3fadefd4..8f2810f2 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -3,7 +3,6 @@ import os import sys from pathlib import Path -from typing import TYPE_CHECKING import structlog from structlog.stdlib import BoundLogger @@ -11,17 +10,13 @@ from backy.utils import generate_taskid from .. import logging +from backy.source import Source -if TYPE_CHECKING: - from backy.repository import Repository -class FileSource: +class FileSource(Source): type_ = "file" - @classmethod - def init(cls, repository: "Repository", log: BoundLogger): - return {"type": cls.type_} def main(): diff --git a/src/backy/repository.py b/src/backy/repository.py index 279f5f64..f3ae1ff9 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -78,9 +78,9 @@ def __init__(self, path: Path, log: BoundLogger): self.scan() @classmethod - def init(self, path: Path, log: BoundLogger, source: Source): - if (path / "config").exists(): - raise RepositoryNotEmpty(self.path) + def init(cls, path: Path, log: BoundLogger, source: Source): + if (path / 'config').exists(): + raise RepositoryNotEmpty(path) if not path.exists(): path.mkdir(parents=True, exist_ok=True) @@ -89,10 +89,10 @@ def init(self, path: Path, log: BoundLogger, source: Source): config = {"schedule": {}, "source": source_config} - with open(self.path / "config", "w") as f: + with open(path / 'config', 'w') as f: yaml.dump(config, f) - self.log.info(f"Initialized empty repository in {self.path}") + log.info(f"Initialized empty repository in {path}") @property def problem_reports(self) -> list[str]: diff --git a/src/backy/source.py b/src/backy/source.py index 07d21ce3..75a277f9 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -2,11 +2,12 @@ from typing import Any from structlog.stdlib import BoundLogger +from importlib.metadata import entry_points -from backy.file import FileSource +SOURCE_PLUGINS = entry_points(group='backy.sources') -# XXX Use plugin discovery here -KNOWN_SOURCES: dict[str, "Source"] = {s.type_: s for s in [FileSource]} +def factory_by_type(type_): + return SOURCE_PLUGINS[type_].load() class Source: From d1888eb98aa5a7753c2a5f7e57458e8d20726df3 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Wed, 26 Jun 2024 16:06:29 +0200 Subject: [PATCH 12/25] note potential new command names --- src/backy/cli/__init__.py | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index adc4d6fd..a1627237 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -26,6 +26,61 @@ from backy.utils import format_datetime_local, generate_taskid + + # single repo commands + + + + + # (init) + + # rev-parse Print full path or uuid of specified revisions + + # log [--filter] (status) Show backup status. Show inventory and summary information + + # backup [--fg] Perform a backup + # restore Restore (a given revision) to a given target + + # distrust Distrust specified revisions + # verify Verify specified revisions + # rm Forget specified revision + # tag Modify tags on revision + + # gc [--expire] [--remote|--local] (Expire revisions) and collect garbage from the repository. + + # pull? update metadata from all known remotes that host backups + # for the same backup source + + # check + + + + + + + + # # multi-repo / daemon-based commands + + # show jobs List status of all known jobs + # show daemon Daemon status + + # pull + + # backup --all [--include=filter] [--exclude=filter] [--fg] + + # check --all + + + + + + + + + + + + class Command(object): """Proxy between CLI calls and actual backup code.""" From 5883f943fc97c5e5c0a00e3e0ae5b5a5c68d77d9 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Thu, 27 Jun 2024 00:47:37 +0200 Subject: [PATCH 13/25] snapshot: some work on cli --- src/backy/cli/__init__.py | 233 ++++++++++++++------------ src/backy/daemon/scheduler.py | 38 +++-- src/backy/daemon/tests/test_api.py | 15 +- src/backy/daemon/tests/test_daemon.py | 12 +- src/backy/ext_deps.py | 4 + src/backy/file/__init__.py | 4 +- src/backy/rbd/__init__.py | 2 +- src/backy/rbd/backup.py | 3 +- src/backy/rbd/tests/test_main.py | 6 +- src/backy/repository.py | 35 ++-- src/backy/revision.py | 4 +- src/backy/source.py | 5 +- 12 files changed, 200 insertions(+), 161 deletions(-) diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index a1627237..aee60704 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -2,6 +2,7 @@ import errno import os import re +import subprocess import sys from pathlib import Path from typing import Any, Dict, Literal, Optional @@ -23,62 +24,47 @@ # XXX invert this dependency from backy.rbd.backup import RestoreBackend from backy.repository import Repository +from backy.revision import Revision from backy.utils import format_datetime_local, generate_taskid +# single repo commands - # single repo commands +# (init) +# rev-parse Print full path or uuid of specified revisions +# log [--filter] (status) Show backup status. Show inventory and summary information +# backup [--fg] Perform a backup +# restore Restore (a given revision) to a given target - # (init) +# distrust Distrust specified revisions +# verify Verify specified revisions +# rm Forget specified revision +# tag Modify tags on revision - # rev-parse Print full path or uuid of specified revisions - - # log [--filter] (status) Show backup status. Show inventory and summary information - - # backup [--fg] Perform a backup - # restore Restore (a given revision) to a given target - - # distrust Distrust specified revisions - # verify Verify specified revisions - # rm Forget specified revision - # tag Modify tags on revision - - # gc [--expire] [--remote|--local] (Expire revisions) and collect garbage from the repository. - - # pull? update metadata from all known remotes that host backups - # for the same backup source - - # check - - - - - - - - # # multi-repo / daemon-based commands - - # show jobs List status of all known jobs - # show daemon Daemon status - - # pull - - # backup --all [--include=filter] [--exclude=filter] [--fg] - - # check --all +# gc [--expire] [--remote|--local] (Expire revisions) and collect garbage from the repository. +# pull? update metadata from all known remotes that host backups +# for the same backup source +# check +# # multi-repo / daemon-based commands +# show-jobs List status of all known jobs +# show-daemon Daemon status +# pull +# backup --all [--include=filter] [--exclude=filter] [--fg] +# check --all +# maybe add a common --repo/--job flag? class Command(object): @@ -105,7 +91,15 @@ def init(self, type): source = backy.source.factory_by_type(type) Repository.init(self.path, self.log, source=source) - def status(self, yaml_: bool, revision: str) -> None: + def rev_parse(self, revision: str, uuid: bool) -> None: + b = Repository(self.path, self.log) + for r in b.find_revisions(revision): + if uuid: + print(r.uuid) + else: + print(r.filename) + + def log_(self, yaml_: bool, revision: str) -> None: revs = Repository(self.path, self.log).find_revisions(revision) if yaml_: print(yaml.safe_dump([r.to_dict() for r in revs])) @@ -168,57 +162,72 @@ def status(self, yaml_: bool, revision: str) -> None: ) def backup(self, tags: str, force: bool) -> int: - b = RbdRepository(self.path, self.log) + b = Repository(self.path, self.log) b._clean() - try: - tags_ = set(t.strip() for t in tags.split(",")) - success = b.backup(tags_, force) - return int(not success) - except IOError as e: - if e.errno not in [errno.EDEADLK, errno.EAGAIN]: - raise - self.log.warning("backup-already-running") - return 1 - finally: - b._clean() + tags_ = set(t.strip() for t in tags.split(",")) + if not force: + b.validate_tags(tags_) + r = Revision.create(b, tags_, self.log) + r.materialize() + proc = subprocess.run( + [ + b.type.value, + "-t", + self.taskid, + "-b", + str(self.path), + "backup", + r.uuid, + ], + ) + b.scan() + b._clean() + return proc.returncode def restore( self, revision: str, target: str, restore_backend: RestoreBackend ) -> None: - b = RbdRepository(self.path, self.log) - b.restore(revision, target, restore_backend) - - def find(self, revision: str, uuid: bool) -> None: - b = RbdRepository(self.path, self.log) - for r in b.find_revisions(revision): - if uuid: - print(r.uuid) - else: - print(r.filename) - - def forget(self, revision: str) -> None: - b = RbdRepository(self.path, self.log) - b.forget(revision) - b.warn_pending_changes() - - def scheduler(self, config: Path) -> None: - backy.daemon.main(config, self.log) - - def purge(self) -> None: - b = RbdRepository(self.path, self.log) - b.purge() - - def upgrade(self) -> None: - b = RbdRepository(self.path, self.log) - b.upgrade() + b = Repository(self.path, self.log) + r = b.find(revision) + proc = subprocess.run( + [ + b.type.value, + "-t", + self.taskid, + "-b", + str(self.path), + "restore", + "--backend", + restore_backend.value, + r.uuid, + target, + ] + ) + return proc.returncode def distrust(self, revision: str) -> None: - b = RbdRepository(self.path, self.log) + b = Repository(self.path, self.log) b.distrust(revision) def verify(self, revision: str) -> None: - b = RbdRepository(self.path, self.log) - b.verify(revision) + b = Repository(self.path, self.log) + r = b.find(revision) + proc = subprocess.run( + [ + b.type.value, + "-t", + self.taskid, + "-b", + str(self.path), + "verify", + r.uuid, + ] + ) + return proc.returncode + + def rm(self, revision: str) -> None: + b = Repository(self.path, self.log) + b.forget(revision) def tags( self, @@ -243,14 +252,31 @@ def tags( autoremove=autoremove, force=force, ) - b.warn_pending_changes() return int(not success) - def expire(self) -> None: + def gc(self, expire: bool) -> None: # XXX needs to update from remote API peers first (pull) - b = backy.repository.Repository(self.path, self.log) - b.expire() - b.warn_pending_changes() + b = Repository(self.path, self.log) + if expire: + b.expire() + proc = subprocess.run( + [ + b.type.value, + "-t", + self.taskid, + "-b", + str(self.path), + "gc", + ] + ) + # if remote: + # push + + def pull(self): + pass + + def check(self): + pass def jobs(self, filter_re=""): """List status of all known jobs. Optionally filter by regex.""" @@ -409,7 +435,7 @@ def main(): ) p.set_defaults(func="init") - p = subparsers.add_parser("jobs", help="List status of all known jobs") + p = subparsers.add_parser("show-jobs", help="List status of all known jobs") p.add_argument( "filter_re", default="", @@ -417,19 +443,10 @@ def main(): nargs="?", help="Optional job filter regex", ) - p.set_defaults(func="jobs") + p.set_defaults(func="show-jobs") - p = subparsers.add_parser("status", help="Show job status overview") - p.set_defaults(func="status") - - p = subparsers.add_parser("run", help="Trigger immediate run for one job") - p.add_argument("job", metavar="", help="Name of the job to run") - p.set_defaults(func="run") - - p = subparsers.add_parser( - "runall", help="Trigger immediate run for all jobs" - ) - p.set_defaults(func="runall") + p = subparsers.add_parser("show-daemon", help="Show job status overview") + p.set_defaults(func="show-daemon") p = subparsers.add_parser( "check", @@ -474,13 +491,18 @@ def main(): p.set_defaults(func="restore") p = subparsers.add_parser( - "purge", + "gc", help="Purge the backup store (i.e. chunked) from unused data", ) - p.set_defaults(func="purge") + p.add_argument( + "--expire", + action="store_true", + help="Expire tags according to schedule", + ) + p.set_defaults(func="gc") p = subparsers.add_parser( - "find", + "rev-parse", help="Print full path or uuid of specified revisions", ) p.add_argument( @@ -495,10 +517,10 @@ def main(): default="latest", help="use revision SPEC to find (default: %(default)s)", ) - p.set_defaults(func="find") + p.set_defaults(func="rev_parse") p = subparsers.add_parser( - "status", + "log", help="Show backup status. Show inventory and summary information", ) p.add_argument("--yaml", dest="yaml_", action="store_true") @@ -509,7 +531,7 @@ def main(): default="all", help="use revision SPEC as filter (default: %(default)s)", ) - p.set_defaults(func="status") + p.set_defaults(func="log_") # DISTRUST p = subparsers.add_parser( @@ -587,11 +609,6 @@ def main(): ) p.set_defaults(func="tags") - p = subparsers.add_parser( - "expire", - help="Expire tags according to schedule", - ) - p.set_defaults(func="expire") args = parser.parse_args() if not hasattr(args, "func"): diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index bba31e01..0d0bb996 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -291,13 +291,9 @@ async def run_forever(self) -> None: self.repository._clean() await self.run_backup(next_tags) self.repository.scan() - await self.repository.run_with_backup_lock( - self.pull_metadata, self.daemon.peers, self.taskid - ) + await self.pull_metadata() await self.run_expiry() - await self.repository.run_with_backup_lock( - self.push_metadata, self.daemon.peers, self.taskid - ) + await self.push_metadata() await self.run_gc() await self.run_callback() except asyncio.CancelledError: @@ -466,7 +462,10 @@ def stop(self) -> None: self._task = None self.update_status("") - async def push_metadata(self, peers, taskid: str) -> int: + async def push_metadata(self) -> int: + return await self.repository.run_with_backup_lock(self._push_metadata) + + async def _push_metadata(self) -> int: grouped = defaultdict(list) for r in self.repository.clean_history: if r.pending_changes: @@ -474,17 +473,21 @@ async def push_metadata(self, peers, taskid: str) -> int: self.log.info( "push-start", changes=sum(len(L) for L in grouped.values()) ) - async with ClientManager(peers, taskid, self.log) as apis: + async with ClientManager( + self.daemon.peers, self.taskid, self.log + ) as apis: errors = await asyncio.gather( *[ - self._push_metadata(apis[server], grouped[server]) + self._push_metadata_single(apis[server], grouped[server]) for server in apis ] ) self.log.info("push-end", errors=sum(errors)) return sum(errors) - async def _push_metadata(self, api: Client, revs: List[Revision]) -> bool: + async def _push_metadata_single( + self, api: Client, revs: List[Revision] + ) -> bool: purge_required = False error = False for r in revs: @@ -531,10 +534,13 @@ async def _push_metadata(self, api: Client, revs: List[Revision]) -> bool: error = True return error - async def pull_metadata(self, peers: dict, taskid: str) -> int: + async def pull_metadata(self) -> int: + return await self.repository.run_with_backup_lock(self._pull_metadata) + + async def _pull_metadata(self) -> int: async def remove_dead_peer(): for r in list(self.repository.history): - if r.server and r.server not in peers: + if r.server and r.server not in self.daemon.peers: self.log.info( "pull-removing-dead-peer", rev_uuid=r.uuid, @@ -544,15 +550,17 @@ async def remove_dead_peer(): return False self.log.info("pull-start") - async with ClientManager(peers, taskid, self.log) as apis: + async with ClientManager( + self.daemon.peers, self.taskid, self.log + ) as apis: errors = await asyncio.gather( remove_dead_peer(), - *[self._pull_metadata(apis[server]) for server in apis], + *[self._pull_metadata_single(apis[server]) for server in apis], ) self.log.info("pull-end", errors=sum(errors)) return sum(errors) - async def _pull_metadata(self, api: Client) -> bool: + async def _pull_metadata_single(self, api: Client) -> bool: error = False log = self.log.bind(server=api.server_name) try: diff --git a/src/backy/daemon/tests/test_api.py b/src/backy/daemon/tests/test_api.py index b0796862..add76c82 100644 --- a/src/backy/daemon/tests/test_api.py +++ b/src/backy/daemon/tests/test_api.py @@ -235,7 +235,6 @@ async def test_simple_sync(daemons, log): new_rev1 = b0.history[1] assert new_rev1.repository == b0 assert new_rev1.timestamp == rev1.timestamp - assert new_rev1.backend_type == "" assert new_rev1.stats == rev1.stats assert new_rev1.tags == rev1.tags assert new_rev1.orig_tags == rev1.tags @@ -413,7 +412,7 @@ async def delay_or_event(delay, event): for job, start_delay in zip(jobs, start_delays): monkeypatch.setattr(job, "run_expiry", null_coroutine) - monkeypatch.setattr(job, "run_purge", null_coroutine) + monkeypatch.setattr(job, "run_gc", null_coroutine) monkeypatch.setattr(job, "run_callback", null_coroutine) monkeypatch.setattr(job, "run_backup", partial(run_backup, job)) monkeypatch.setattr(job, "pull_metadata", null_coroutine) @@ -483,7 +482,7 @@ async def test_wait_for_leader_parallel(jobs_dry_run): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=1 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (fast)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (fast)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... ... AAAA I test01[N6PW] job/leader-finished [server-1] leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' @@ -534,7 +533,7 @@ async def test_wait_for_leader_delayed(jobs_dry_run): ... AAAA I test01[N6PW] job/leader-not-scheduled [server-1] leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[N6PW] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' ... """ @@ -598,12 +597,12 @@ async def crash(*args, **kw): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=1 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (fast)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (fast)' -... AAAA I daemon/api-reconfigure [server-0] \n\ +... AAAA I - daemon/api-reconfigure [server-0] \n\ ... ... AAAA W test01[N6PW] job/leader-failed [server-1] exception_class='aiohttp.client_exceptions.ClientResponseError' exception_msg="401, message='Unauthorized', url=URL('...')" leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[N6PW] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-0] status='finished' ... ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' @@ -646,7 +645,7 @@ async def test_wait_for_leader_stopped(jobs_dry_run): ... AAAA I test01[A4WN] job/leader-stopped [server-1] leader='server-0' ... AAAA D test01[A4WN] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[A4WN] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-1] status='finished' ... """ @@ -703,7 +702,7 @@ async def noop(*args, **kw): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=0 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (slow)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-0] status='finished' ... """ diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index 031b8b67..1ca0d7e0 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -291,7 +291,7 @@ async def test_task_generator(daemon, clock, tmp_path, monkeypatch, tz_berlin): await cancel_and_wait(j) job = daemon.jobs["test01"] - async def null_coroutine(): + async def null_coroutine(*args, **kw): return monkeypatch.setattr(job, "_wait_for_deadline", null_coroutine) @@ -319,7 +319,7 @@ async def test_task_generator_backoff( await cancel_and_wait(j) job = daemon.jobs["test01"] - async def null_coroutine(): + async def null_coroutine(*args, **kw): await asyncio.sleep(0.1) async def false_coroutine(*args, **kw): @@ -339,7 +339,7 @@ async def failing_coroutine(*args, **kw): monkeypatch.setattr(job, "_wait_for_deadline", null_coroutine) monkeypatch.setattr(job, "run_expiry", null_coroutine) - monkeypatch.setattr(job, "run_purge", null_coroutine) + monkeypatch.setattr(job, "run_gc", null_coroutine) monkeypatch.setattr(job, "run_callback", null_coroutine) monkeypatch.setattr(job, "run_backup", failing_coroutine) monkeypatch.setattr(job, "pull_metadata", null_coroutine) @@ -421,8 +421,8 @@ def test_daemon_status_filter_re(daemon): async def test_purge_pending(daemon, monkeypatch): - run_purge = mock.Mock() - monkeypatch.setattr("backy.scheduler.Job.run_purge", run_purge) + run_gc = mock.Mock() + monkeypatch.setattr("backy.daemon.scheduler.Job.run_gc", run_gc) monkeypatch.setattr( "asyncio.sleep", mock.Mock(side_effect=asyncio.CancelledError()) ) @@ -433,4 +433,4 @@ async def test_purge_pending(daemon, monkeypatch): with pytest.raises(asyncio.CancelledError): await daemon.purge_pending_backups() - run_purge.assert_called_once() + run_gc.assert_called_once() diff --git a/src/backy/ext_deps.py b/src/backy/ext_deps.py index 27094000..51212d45 100644 --- a/src/backy/ext_deps.py +++ b/src/backy/ext_deps.py @@ -14,6 +14,10 @@ "BACKY_RBD_CMD", os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy-rbd"), ) +BACKY_S3_CMD = os.environ.get( + "BACKY_S3_CMD", + os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy-s3"), +) CP = os.environ.get("BACKY_CP", "cp") RBD = os.environ.get("BACKY_RBD", "rbd") BACKY_EXTRACT = os.environ.get("BACKY_EXTRACT", "backy-extract") diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 8f2810f2..c4fda311 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -7,18 +7,16 @@ import structlog from structlog.stdlib import BoundLogger +from backy.source import Source from backy.utils import generate_taskid from .. import logging -from backy.source import Source - class FileSource(Source): type_ = "file" - def main(): parser = argparse.ArgumentParser( description="Backup and restore for block devices.", diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index cc897554..274a0f24 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -105,7 +105,7 @@ def main(): success = b.backup(args.revision) ret = int(not success) case "restore": - b.restore(args.revisions, args.target, args.backend) + b.restore(args.revision, args.target, args.restore_backend) case "gc": b.gc() case "verify": diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/backup.py index b1b9579e..6efecf4a 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/backup.py @@ -95,9 +95,9 @@ def backup(self, rev_uuid: str) -> bool: "Source is not ready (does it exist? can you access it?)" ) - backend = ChunkedFileBackend(new_revision, self.log) with self.source(new_revision) as source: try: + backend = ChunkedFileBackend(new_revision, self.log) source.backup(backend) verified = source.verify(backend) except BackendException: @@ -129,6 +129,7 @@ def backup(self, rev_uuid: str) -> bool: for revision in reversed(self.get_history(clean=True, local=True)): if revision.trust == Trust.DISTRUSTED: self.log.warning("inconsistent") + backend = ChunkedFileBackend(revision, self.log) backend.verify() break return verified diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index e5d68503..797fa1c0 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -28,7 +28,8 @@ def test_display_usage(capsys, argv): out, err = capsys.readouterr() assert ( """\ -usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] {backup,restore,gc,verify} ... +usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] + {backup,restore,gc,verify} ... """ == out ) @@ -44,7 +45,8 @@ def test_display_help(capsys, argv): assert ( Ellipsis( """\ -usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] {backup,restore,gc,verify} ... +usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] + {backup,restore,gc,verify} ... Backup and restore for block devices. diff --git a/src/backy/repository.py b/src/backy/repository.py index f3ae1ff9..7a5ca3d3 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -1,6 +1,7 @@ import datetime import fcntl import re +from enum import Enum from math import ceil, floor from pathlib import Path from typing import IO, List, Literal, Optional, TypedDict @@ -19,6 +20,7 @@ unique, ) +from .ext_deps import BACKY_RBD_CMD, BACKY_S3_CMD from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule @@ -43,6 +45,21 @@ class StatusDict(TypedDict): local_revs: int +class RepositoryType(Enum): + rbd = BACKY_RBD_CMD + s3 = BACKY_S3_CMD + + @classmethod + def from_str(cls, str: str) -> "RepositoryType": + match str: + case "rbd": + return cls.rbd + case "s3": + return cls.s3 + case _: + raise ValueError("invalid str for RepositoryType: " + str) + + class Repository(object): """A repository of backup revisions of some object.""" @@ -54,6 +71,7 @@ class Repository(object): _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] + type: RepositoryType def __init__(self, path: Path, log: BoundLogger): self.log = log.bind(subsystem="backup") @@ -75,11 +93,13 @@ def __init__(self, path: Path, log: BoundLogger): self.schedule = Schedule() self.schedule.configure(self.config["schedule"]) + self.type = RepositoryType.from_str(self.config.get("type", "rbd")) + self.scan() @classmethod def init(cls, path: Path, log: BoundLogger, source: Source): - if (path / 'config').exists(): + if (path / "config").exists(): raise RepositoryNotEmpty(path) if not path.exists(): @@ -89,7 +109,7 @@ def init(cls, path: Path, log: BoundLogger, source: Source): config = {"schedule": {}, "source": source_config} - with open(path / 'config', 'w') as f: + with open(path / "config", "w") as f: yaml.dump(config, f) log.info(f"Initialized empty repository in {path}") @@ -218,17 +238,6 @@ def validate_tags(self, tags): ) raise RuntimeError("Unknown tags") - def warn_pending_changes(self, revs: Optional[List[Revision]] = None): - revs = revs if revs is not None else self.history - pending = [r for r in revs if r.pending_changes] - if pending: - self.log.warning( - "pending-changes", - _fmt_msg="Synchronize with remote server (backy push) or " - "risk loosing changes", - revisions=",".join(r.uuid for r in pending), - ) - def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): revs = revs if revs is not None else self.history remote = [r for r in revs if r.server] diff --git a/src/backy/revision.py b/src/backy/revision.py index 7cb06a99..ef31a338 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -60,13 +60,13 @@ def __init__( @classmethod def create( cls, - backup: "Repository", + repository: "Repository", tags: set[str], log: BoundLogger, *, uuid: Optional[str] = None, ) -> "Revision": - r = Revision(backup, log, uuid) + r = Revision(repository, log, uuid) r.tags = tags return r diff --git a/src/backy/source.py b/src/backy/source.py index 75a277f9..526476b8 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,10 +1,11 @@ +from importlib.metadata import entry_points from pathlib import Path from typing import Any from structlog.stdlib import BoundLogger -from importlib.metadata import entry_points -SOURCE_PLUGINS = entry_points(group='backy.sources') +SOURCE_PLUGINS = entry_points(group="backy.sources") + def factory_by_type(type_): return SOURCE_PLUGINS[type_].load() From 9666d3e9c054cd838b7188273ea78eb9363264d2 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Thu, 27 Jun 2024 09:30:42 +0200 Subject: [PATCH 14/25] more fixed tests: backy.main -> backy.cli --- src/backy/cli/tests/test_main.py | 64 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 28fa6dbb..0ac9d311 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -10,7 +10,7 @@ from backy import utils from backy.revision import Revision from backy.tests import Ellipsis - +import backy.cli @pytest.fixture def argv(): @@ -23,7 +23,7 @@ def argv(): def test_display_usage(capsys, argv): with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( @@ -41,7 +41,7 @@ def test_display_usage(capsys, argv): def test_display_client_usage(capsys, argv): argv.append("client") with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( @@ -57,7 +57,7 @@ def test_display_client_usage(capsys, argv): def test_display_help(capsys, argv): argv.append("--help") with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( @@ -82,7 +82,7 @@ def test_display_help(capsys, argv): def test_display_client_help(capsys, argv): argv.extend(["client", "--help"]) with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( @@ -105,7 +105,7 @@ def test_verbose_logging(capsys, argv): # for -v is covered. argv.extend(["-v"]) with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 @@ -120,17 +120,17 @@ async def async_print_args(*args, **kw): def test_call_status(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "status", print_args) + monkeypatch.setattr(backy.cli.Command, "status", print_args) argv.extend(["-v", "-b", str(backup.path), "status"]) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( """\ -(,) +(,) {'revision': 'all', 'yaml_': False} """ ) @@ -179,7 +179,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): argv.extend(["-v", "backup", "manual:test"]) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() out, err = capsys.readouterr() assert ( Ellipsis( @@ -205,17 +205,17 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): def test_call_find(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "find", print_args) + monkeypatch.setattr(backy.cli.Command, "find", print_args) argv.extend(["-v", "-b", str(backup.path), "find", "-r", "1"]) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( """\ -(,) +(,) {'revision': '1', 'uuid': False} """ ) @@ -269,7 +269,7 @@ def test_call_client( argv.extend(["-v", "client", "-c", conf, action, *args.values()]) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( @@ -297,7 +297,7 @@ def test_call_client( def test_call_scheduler(capsys, backup, argv, monkeypatch, tmp_path): - monkeypatch.setattr(backy.main.Command, "scheduler", print_args) + monkeypatch.setattr(backy.cli.Command, "scheduler", print_args) argv.extend( [ "-v", @@ -310,13 +310,13 @@ def test_call_scheduler(capsys, backup, argv, monkeypatch, tmp_path): ) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( """\ -(,) +(,) {'config': PosixPath('/etc/backy.conf')} """ ) @@ -337,18 +337,18 @@ def test_call_scheduler(capsys, backup, argv, monkeypatch, tmp_path): @pytest.mark.parametrize("action", ["set", "add", "remove"]) def test_call_tags(capsys, backup, argv, monkeypatch, action): - monkeypatch.setattr(backy.main.Command, "tags", print_args) + monkeypatch.setattr(backy.cli.Command, "tags", print_args) argv.extend( ["-v", "-b", str(backup.path), "tags", action, "-r", "last", "manual:a"] ) with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( f"""\ -(,) +(,) {{'action': '{action}', 'autoremove': False, 'expect': None, @@ -375,16 +375,16 @@ def test_call_tags(capsys, backup, argv, monkeypatch, action): def test_call_expire(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "expire", print_args) + monkeypatch.setattr(backy.cli.Command, "expire", print_args) argv.extend(["-v", "-b", str(backup.path), "expire"]) with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( """\ -(,) +(,) {} """ ) @@ -406,7 +406,7 @@ def test_call_expire(capsys, backup, argv, monkeypatch): @pytest.mark.parametrize("action", ["pull", "push"]) def test_call_pull_push(capsys, backup, argv, monkeypatch, tmp_path, action): - monkeypatch.setattr(backy.main.Command, action, print_args) + monkeypatch.setattr(backy.cli.Command, action, print_args) conf = tmp_path / "conf" with open(conf, "w") as c: c.write( @@ -427,13 +427,13 @@ def test_call_pull_push(capsys, backup, argv, monkeypatch, tmp_path, action): argv.extend(["-v", "-b", str(backup.path), action, "-c", str(conf)]) utils.log_data = "" with pytest.raises(SystemExit) as exit: - backy.main.main() + backy.cli.main() assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( f"""\ -(,) +(,) {{'config': {repr(conf)}}} """ ) @@ -458,7 +458,7 @@ def test_call_unexpected_exception( def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.main.Command, "status", do_raise) + monkeypatch.setattr(backy.cli.Command, "status", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) @@ -468,7 +468,7 @@ def do_raise(*args, **kw): ) utils.log_data = "" with pytest.raises(SystemExit): - backy.main.main() + backy.cli.main() out, err = capsys.readouterr() assert "" == out assert ( @@ -478,9 +478,9 @@ def do_raise(*args, **kw): ... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} ... E command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): -exception>\t File ".../src/backy/main.py", line ..., in main +exception>\t File ".../src/backy/cli/__init__.py", line ..., in main exception>\t ret = func(**func_args) -exception>\t File ".../src/backy/tests/test_main.py", line ..., in do_raise +exception>\t File ".../src/backy/cli/tests/test_main.py", line ..., in do_raise exception>\t raise RuntimeError("test") exception>\tRuntimeError: test """ @@ -492,7 +492,7 @@ def do_raise(*args, **kw): def test_commands_wrapper_status( backup, tmp_path, capsys, clock, tz_berlin, log ): - commands = backy.main.Command(tmp_path, "AAAA", log) + commands = backy.cli.Command(tmp_path, "AAAA", log) revision1 = Revision.create(backup, {"daily"}, log, uuid="1") revision1.materialize() @@ -535,7 +535,7 @@ def test_commands_wrapper_status( def test_commands_wrapper_status_yaml( backup, tmp_path, capsys, clock, tz_berlin, log ): - commands = backy.main.Command(tmp_path, "AAAA", log) + commands = backy.cli.Command(tmp_path, "AAAA", log) revision = Revision.create(backup, set(), log, uuid="1") revision.stats["duration"] = 3.5 From fe9970c6f2b27af725451ee40992431ae2d4d1b4 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Thu, 27 Jun 2024 11:08:09 +0200 Subject: [PATCH 15/25] snapshot: implement a simple file source --- src/backy/cli/tests/test_main.py | 5 +-- src/backy/file/__init__.py | 53 +++++++++++++++++++++++++++---- src/backy/file/tests/test_file.py | 26 +++++++++++++++ src/backy/logging.py | 25 +++++++++++---- src/backy/repository.py | 22 ++++++++++--- src/backy/source.py | 31 ++++++++++++++++++ 6 files changed, 143 insertions(+), 19 deletions(-) create mode 100644 src/backy/file/tests/test_file.py diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 0ac9d311..4ed9823e 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -11,6 +11,7 @@ from backy.revision import Revision from backy.tests import Ellipsis import backy.cli +import backy.cli.client @pytest.fixture def argv(): @@ -248,7 +249,7 @@ def test_call_find(capsys, backup, argv, monkeypatch): def test_call_client( capsys, backup, argv, monkeypatch, log, tmp_path, action, args ): - monkeypatch.setattr(backy.client.CLIClient, action, async_print_args) + monkeypatch.setattr(backy.cli.client.CLIClient, action, async_print_args) conf = str(tmp_path / "conf") with open(conf, "w") as c: c.write( @@ -275,7 +276,7 @@ def test_call_client( assert ( Ellipsis( f"""\ -(,) +(,) {args} """ ) diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index c4fda311..1f2a3c04 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -1,12 +1,14 @@ import argparse import errno -import os +import shutil import sys from pathlib import Path import structlog from structlog.stdlib import BoundLogger +import backy.repository +from backy.revision import Revision from backy.source import Source from backy.utils import generate_taskid @@ -16,10 +18,51 @@ class FileSource(Source): type_ = "file" + path: Path # the source we are backing up + + def __init__( + self, + path: Path, + repository: backy.repository.Repository, + log: BoundLogger, + ): + self.path = path + self.repository = repository + self.log = log + + def _path_for_revision(self, revision: Revision) -> Path: + return self.repository.path / revision.uuid + + def backup(self, revision: Revision): + backup = self._path_for_revision(revision) + assert not backup.exists() + shutil.copy(self.path, backup) + + def restore(self, revision: Revision, target: Path): + shutil.copy(self._path_for_revision(revision), target) + + def gc(self): + files = set(self.repository.path.glob("*")) + expected_files = set( + (self.repository.path / r.uuid) + for r in self.repository.get_history() + ) + for file in files - expected_files: + file.unlink() + + def verify(self): + for revision in self.repository.get_history(): + assert (self.path / revision.uuid).exists() + def main(): parser = argparse.ArgumentParser( - description="Backup and restore for block devices.", + description="""Backup and restore for individual files. + +This is mostly a dummy implementation to assist testing and development: +it is only able to back up from a single file and store versions of it +in a very simplistic fashion. +""", ) parser.add_argument( "-v", "--verbose", action="store_true", help="verbose output" @@ -73,7 +116,7 @@ def main(): # GC p = subparsers.add_parser( "gc", - help="Purge the backup store from unused data", + help="Remove unused data from the repository.", ) p.set_defaults(func="gc") @@ -87,8 +130,6 @@ def main(): args = parser.parse_args() - os.chdir(args.C) - if not hasattr(args, "func"): parser.print_usage() sys.exit(0) @@ -103,7 +144,7 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdRepository(args.backupdir, log) + b = FileSource(args.backupdir, log) # XXX scheduler? b._clean() ret = 0 diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py new file mode 100644 index 00000000..baeb0417 --- /dev/null +++ b/src/backy/file/tests/test_file.py @@ -0,0 +1,26 @@ +from backy.file import FileSource +from backy.repository import Repository +from backy.revision import Revision + + +def test_simple_cycle(tmp_path, log): + original = tmp_path / "original.txt" + with open(original, "w") as f: + f.write("This is the original file.") + + repo_path = tmp_path / "repository" + repository = Repository.init(repo_path, log, FileSource) + + source = FileSource(original, repository, log) + + revision = Revision.create(repository, {"test"}, log) + source.backup(revision) + + with open(original, "w") as f: + f.write("This is the wrong file.") + + assert original.read_text() == "This is the wrong file." + + source.restore(revision, original) + + assert original.read_text() == "This is the original file." diff --git a/src/backy/logging.py b/src/backy/logging.py index 561a9dad..da36ff32 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -118,7 +118,9 @@ def __getattr__(self, name): def prefix(prefix, line): - return "{}>\t".format(prefix) + line.replace("\n", "\n{}>\t".format(prefix)) + return "{}>\t".format(prefix) + line.replace( + "\n", "\n{}>\t".format(prefix) + ) class ConsoleFileRenderer: @@ -143,7 +145,9 @@ def __init__(self, min_level, pad_event=_EVENT_WIDTH): self.min_level = self.LEVELS.index(min_level.lower()) if colorama is None: print( - _MISSING.format(who=self.__class__.__name__, package="colorama") + _MISSING.format( + who=self.__class__.__name__, package="colorama" + ) ) if COLORIZED_TTY_OUTPUT: colorama.init() @@ -212,7 +216,10 @@ def write(line): level = event_dict.pop("level", None) if level is not None: write( - self._level_to_color[level] + level[0].upper() + RESET_ALL + " " + self._level_to_color[level] + + level[0].upper() + + RESET_ALL + + " " ) job_name = event_dict.pop("job_name", "-") @@ -232,10 +239,14 @@ def write(line): + RESET_ALL + " " ) - if len(subsystem + event) > self._pad_event and hasattr( - utils, "log_data" - ): - raise RuntimeWarning("logline to long: " + subsystem + event) + + test_mode = hasattr(utils, "log_data") + if test_mode and len(subsystem + event) > self._pad_event: + raise RuntimeWarning( + "subsystem and/or event names are too long: " + + subsystem + + event + ) logger_name = event_dict.pop("logger", None) if logger_name is not None: diff --git a/src/backy/repository.py b/src/backy/repository.py index 7a5ca3d3..839ccef5 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -61,7 +61,19 @@ def from_str(cls, str: str) -> "RepositoryType": class Repository(object): - """A repository of backup revisions of some object.""" + """A repository stores and manages backups for a single source. + + The repository handles metadata information around backups, manages the + schedule and tags and can expire revisions. + + A single backup for something (an RBD disk image, an S3 pool of + buckets, ...) is called a revision and thus we use "backup" synomymously + with "revision". + + The actual implementation of making and restoring backups as well as + storing the data is provided by the `source` implementations. + + """ path: Path config: dict @@ -71,7 +83,7 @@ class Repository(object): _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] - type: RepositoryType + type_: RepositoryType def __init__(self, path: Path, log: BoundLogger): self.log = log.bind(subsystem="backup") @@ -98,7 +110,7 @@ def __init__(self, path: Path, log: BoundLogger): self.scan() @classmethod - def init(cls, path: Path, log: BoundLogger, source: Source): + def init(cls, path: Path, log: BoundLogger, source: type[Source]) -> "Repository": if (path / "config").exists(): raise RepositoryNotEmpty(path) @@ -112,7 +124,9 @@ def init(cls, path: Path, log: BoundLogger, source: Source): with open(path / "config", "w") as f: yaml.dump(config, f) - log.info(f"Initialized empty repository in {path}") + log.info(f"repo-initialized", path=path) + + return cls(path, log) @property def problem_reports(self) -> list[str]: diff --git a/src/backy/source.py b/src/backy/source.py index 526476b8..9e090049 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -12,6 +12,37 @@ def factory_by_type(type_): class Source: + """A source provides specific implementations for making and restoring + backups. + + There are three major aspects provided by a source implementation: + + 1. Extracting data from another system (e.g. Ceph RBD or S3). + + 2. Storing that data in the repository directory. + + 3. Restoring data, typically providing different workflows: + + - full restore into the original system (e.g. into an RBD image) + - full restore into another system (e.g. into a local image file) + - partial restore (e.g. allowing interactive access to a loop mounted version of the image) + + Additionally a few house keeping tasks need to be implemented: + + - garbage collection, to remove data that isn't needed after revisions + have expired + + - verification of stored data to protect against low level corruption + + + Implementations can be split into two parts: + + - a light shim as a Python class that can interact with the + rest of the backy code within Python + + - a subprocess that backy interacts with to trigger the actual work. + + """ type_: str config: dict[str, Any] From 1892c42dae3bd734d8b831635fc5d9222f85b025 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Thu, 27 Jun 2024 15:09:24 +0200 Subject: [PATCH 16/25] snapshot --- src/backy/cli/__init__.py | 40 ++-- src/backy/cli/client.py | 202 ------------------- src/backy/file/__init__.py | 25 ++- src/backy/file/tests/test_file.py | 8 +- src/backy/rbd/__init__.py | 4 +- src/backy/rbd/conftest.py | 4 +- src/backy/rbd/{backup.py => rbdsource.py} | 68 +++---- src/backy/rbd/sources/__init__.py | 4 +- src/backy/rbd/sources/ceph/source.py | 8 +- src/backy/rbd/sources/file.py | 8 +- src/backy/rbd/sources/flyingcircus/source.py | 4 +- src/backy/rbd/tests/test_backy-rbd.py | 4 +- src/backy/rbd/tests/test_main.py | 4 +- src/backy/rbd/tests/test_source.py | 4 +- src/backy/repository.py | 48 +++-- src/backy/source.py | 27 ++- 16 files changed, 142 insertions(+), 320 deletions(-) delete mode 100644 src/backy/cli/client.py rename src/backy/rbd/{backup.py => rbdsource.py} (81%) diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index aee60704..53d0e616 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -1,4 +1,5 @@ import argparse +import asyncio import errno import os import re @@ -20,9 +21,11 @@ import backy.daemon import backy.source from backy import logging +from backy.daemon import BackyDaemon +from backy.daemon.api import Client # XXX invert this dependency -from backy.rbd.backup import RestoreBackend +from backy.rbd.rbdsource import RestoreBackend from backy.repository import Repository from backy.revision import Revision from backy.utils import format_datetime_local, generate_taskid @@ -71,11 +74,13 @@ class Command(object): """Proxy between CLI calls and actual backup code.""" path: Path + config: Path taskid: str log: BoundLogger - def __init__(self, path: Path, taskid, log: BoundLogger): + def __init__(self, path: Path, config: Path, taskid, log: BoundLogger): self.path = path.resolve() + self.config = config self.taskid = taskid self.log = log @@ -87,6 +92,13 @@ def __call__(self, cmdname: str, args: dict[str, Any]): self.log.debug("return-code", code=ret) return ret + def create_api_client(self): + d = BackyDaemon(self.config, self.log) + d._read_config() + return Client.from_conf( + "", d.api_cli_default, self.taskid, self.log + ) + def init(self, type): source = backy.source.factory_by_type(type) Repository.init(self.path, self.log, source=source) @@ -186,7 +198,7 @@ def backup(self, tags: str, force: bool) -> int: def restore( self, revision: str, target: str, restore_backend: RestoreBackend - ) -> None: + ) -> int: b = Repository(self.path, self.log) r = b.find(revision) proc = subprocess.run( @@ -209,7 +221,7 @@ def distrust(self, revision: str) -> None: b = Repository(self.path, self.log) b.distrust(revision) - def verify(self, revision: str) -> None: + def verify(self, revision: str) -> int: b = Repository(self.path, self.log) r = b.find(revision) proc = subprocess.run( @@ -278,8 +290,9 @@ def pull(self): def check(self): pass - def jobs(self, filter_re=""): + def show_jobs(self, filter_re="") -> int: """List status of all known jobs. Optionally filter by regex.""" + api = self.create_api_client() tz = format_datetime_local(None)[1] @@ -295,7 +308,7 @@ def jobs(self, filter_re=""): "Next Tags", ) - jobs = self.api.fetch_status(filter_re) + jobs = asyncio.run(api.fetch_status(filter_re)) jobs.sort(key=lambda j: j["job"]) for job in jobs: overdue = ( @@ -322,7 +335,7 @@ def jobs(self, filter_re=""): next_time, job["next_tags"], ) - backups = self.api.list_backups() + backups = asyncio.run(api.list_backups()) if filter_re: backups = list(filter(re.compile(filter_re).search, backups)) for b in backups: @@ -331,12 +344,13 @@ def jobs(self, filter_re=""): rprint(t) print("{} jobs shown".format(len(jobs) + len(backups))) - def status(self): + def show_daemon(self): """Show job status overview""" + api = self.create_api_client() t = Table("Status", "#") state_summary: Dict[str, int] = {} - jobs = self.api.get_jobs() - jobs += [{"status": "Dead"} for _ in self.api.list_backups()] + jobs = asyncio.run(api.get_jobs()) + jobs += [{"status": "Dead"} for _ in asyncio.run(api.list_backups())] for job in jobs: state_summary.setdefault(job["status"], 0) state_summary[job["status"]] += 1 @@ -443,10 +457,10 @@ def main(): nargs="?", help="Optional job filter regex", ) - p.set_defaults(func="show-jobs") + p.set_defaults(func="show_jobs") p = subparsers.add_parser("show-daemon", help="Show job status overview") - p.set_defaults(func="show-daemon") + p.set_defaults(func="show_daemon") p = subparsers.add_parser( "check", @@ -623,7 +637,7 @@ def main(): log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) - command = Command(args.C, task_id, log) + command = Command(args.C, args.config, task_id, log) func = args.func # Pass over to function diff --git a/src/backy/cli/client.py b/src/backy/cli/client.py deleted file mode 100644 index 1aebf006..00000000 --- a/src/backy/cli/client.py +++ /dev/null @@ -1,202 +0,0 @@ -import re -import sys -from typing import TYPE_CHECKING, Dict, List, Optional, Pattern - -import humanize -from aiohttp import ClientResponseError -from aiohttp.web_exceptions import HTTPNotFound -from rich import print as rprint -from rich.table import Column, Table -from structlog.stdlib import BoundLogger - -from backy.repository import StatusDict -from backy.utils import format_datetime_local - -if TYPE_CHECKING: - from backy.daemon import BackyDaemon - - -# XXX this is partially duplicated in the daemon -def status(self, filter_re: Optional[Pattern[str]] = None) -> List[StatusDict]: - """Collects status information for all jobs.""" - # XXX with a database backend, we can evaluate this in live actually - # so this should move to the CLI client - result: List["BackyDaemon.StatusDict"] = [] - for job in list(self.jobs.values()): - if filter_re and not filter_re.search(job.name): - continue - job.repository.scan() - manual_tags = set() - unsynced_revs = 0 - history = job.repository.clean_history - for rev in history: - manual_tags |= filter_manual_tags(rev.tags) - if rev.pending_changes: - unsynced_revs += 1 - result.append( - dict( - job=job.name, - sla="OK" if job.sla else "TOO OLD", - sla_overdue=job.sla_overdue, - status=job.status, - last_time=history[-1].timestamp if history else None, - last_tags=( - ",".join(job.schedule.sorted_tags(history[-1].tags)) - if history - else None - ), - last_duration=( - history[-1].stats.get("duration", 0) if history else None - ), - next_time=job.next_time, - next_tags=( - ",".join(job.schedule.sorted_tags(job.next_tags)) - if job.next_tags - else None - ), - manual_tags=", ".join(manual_tags), - quarantine_reports=len(job.repository.quarantine.report_ids), - unsynced_revs=unsynced_revs, - local_revs=len( - job.repository.get_history(clean=True, local=True) - ), - ) - ) - return result - - -class CLIClient: - log: BoundLogger - - def __init__(self, apiclient, log): - self.api = apiclient - self.log = log.bind(subsystem="CLIClient") - - async def __aenter__(self) -> "CLIClient": - await self.api.__aenter__() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.api.__aexit__(exc_type, exc_val, exc_tb) - - async def jobs(self, filter_re=""): - """List status of all known jobs. Optionally filter by regex.""" - - tz = format_datetime_local(None)[1] - - t = Table( - "Job", - "SLA", - "SLA overdue", - "Status", - f"Last Backup ({tz})", - "Last Tags", - Column("Last Duration", justify="right"), - f"Next Backup ({tz})", - "Next Tags", - ) - - jobs = await self.api.fetch_status(filter_re) - jobs.sort(key=lambda j: j["job"]) - for job in jobs: - overdue = ( - humanize.naturaldelta(job["sla_overdue"]) - if job["sla_overdue"] - else "-" - ) - last_duration = ( - humanize.naturaldelta(job["last_duration"]) - if job["last_duration"] - else "-" - ) - last_time = format_datetime_local(job["last_time"])[0] - next_time = format_datetime_local(job["next_time"])[0] - - t.add_row( - job["job"], - job["sla"], - overdue, - job["status"], - last_time, - job["last_tags"], - last_duration, - next_time, - job["next_tags"], - ) - backups = await self.api.list_backups() - if filter_re: - backups = list(filter(re.compile(filter_re).search, backups)) - for b in backups: - t.add_row(b, "-", "-", "Dead", "-", "", "-", "-", "") - - rprint(t) - print("{} jobs shown".format(len(jobs) + len(backups))) - - async def status(self): - """Show job status overview""" - t = Table("Status", "#") - state_summary: Dict[str, int] = {} - jobs = await self.api.get_jobs() - jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] - for job in jobs: - state_summary.setdefault(job["status"], 0) - state_summary[job["status"]] += 1 - - for state in sorted(state_summary): - t.add_row(state, str(state_summary[state])) - rprint(t) - - async def run(self, job: str): - """Trigger immediate run for one job""" - try: - await self.api.run_job(job) - except ClientResponseError as e: - if e.status == HTTPNotFound.status_code: - self.log.error("unknown-job", job=job) - sys.exit(1) - raise - self.log.info("triggered-run", job=job) - - async def runall(self): - """Trigger immediate run for all jobs""" - jobs = await self.api.get_jobs() - for job in jobs: - await self.run(job["name"]) - - async def reload(self): - """Reload the configuration.""" - self.log.info("reloading-daemon") - await self.api.reload_daemon() - self.log.info("reloaded-daemon") - - async def check(self): - status = await self.api.fetch_status() - - exitcode = 0 - - for job in status: - log = self.log.bind(job_name=job["job"]) - if job["manual_tags"]: - log.info( - "check-manual-tags", - manual_tags=job["manual_tags"], - ) - if job["unsynced_revs"]: - self.log.info( - "check-unsynced-revs", unsynced_revs=job["unsynced_revs"] - ) - if job["sla"] != "OK": - log.critical( - "check-sla-violation", - last_time=str(job["last_time"]), - sla_overdue=job["sla_overdue"], - ) - exitcode = max(exitcode, 2) - if job["quarantine_reports"]: - log.warning( - "check-quarantined", reports=job["quarantine_reports"] - ) - exitcode = max(exitcode, 1) - - self.log.info("check-exit", exitcode=exitcode, jobs=len(status)) - raise SystemExit(exitcode) diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 1f2a3c04..0e0adef3 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -3,32 +3,35 @@ import shutil import sys from pathlib import Path +from typing import Any import structlog from structlog.stdlib import BoundLogger -import backy.repository from backy.revision import Revision from backy.source import Source from backy.utils import generate_taskid from .. import logging +from ..repository import Repository class FileSource(Source): - type_ = "file" - path: Path # the source we are backing up def __init__( - self, - path: Path, - repository: backy.repository.Repository, - log: BoundLogger, + self, repository: Repository, config: dict[str, Any], log: BoundLogger ): - self.path = path - self.repository = repository - self.log = log + super().__init__(repository, log) + self.path = Path(config["path"]) + + @property + def subcommand(self) -> str: + return "backy-file" + + @staticmethod + def to_config(path: Path) -> dict[str, Any]: + return {"path": str(path)} def _path_for_revision(self, revision: Revision) -> Path: return self.repository.path / revision.uuid @@ -52,7 +55,7 @@ def gc(self): def verify(self): for revision in self.repository.get_history(): - assert (self.path / revision.uuid).exists() + assert self._path_for_revision(revision).exists() def main(): diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py index baeb0417..f31b78b8 100644 --- a/src/backy/file/tests/test_file.py +++ b/src/backy/file/tests/test_file.py @@ -1,3 +1,5 @@ +from typing import cast + from backy.file import FileSource from backy.repository import Repository from backy.revision import Revision @@ -9,9 +11,11 @@ def test_simple_cycle(tmp_path, log): f.write("This is the original file.") repo_path = tmp_path / "repository" - repository = Repository.init(repo_path, log, FileSource) - source = FileSource(original, repository, log) + repository = Repository.init( + repo_path, FileSource, FileSource.to_config(repo_path), log + ) + source = cast(FileSource, repository.get_source()) revision = Revision.create(repository, {"test"}, log) source.backup(revision) diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 274a0f24..483aa5d0 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -8,7 +8,7 @@ from backy.utils import generate_taskid from .. import logging -from .backup import RbdRepository, RestoreBackend +from .rbdsource import RbdSource, RestoreBackend def main(): @@ -98,7 +98,7 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = RbdRepository(args.backupdir, log) + b = RbdSource(args.backupdir, log) ret = 0 match args.func: case "backup": diff --git a/src/backy/rbd/conftest.py b/src/backy/rbd/conftest.py index e475bda5..f248141c 100644 --- a/src/backy/rbd/conftest.py +++ b/src/backy/rbd/conftest.py @@ -3,7 +3,7 @@ import pytest -from backy.rbd import RbdRepository +from backy.rbd import RbdSource from backy.revision import Revision fixtures = os.path.dirname(__file__) + "/tests/samples" @@ -22,7 +22,7 @@ def rbdrepository(schedule, tmp_path, log): }, f, ) - return RbdRepository(tmp_path, log) + return RbdSource(tmp_path, log) def create_rev(rbdrepository, tags): diff --git a/src/backy/rbd/backup.py b/src/backy/rbd/rbdsource.py similarity index 81% rename from src/backy/rbd/backup.py rename to src/backy/rbd/rbdsource.py index 6efecf4a..8f1eaba5 100644 --- a/src/backy/rbd/backup.py +++ b/src/backy/rbd/rbdsource.py @@ -3,7 +3,7 @@ import time from enum import Enum from pathlib import Path -from typing import IO +from typing import IO, Any from structlog.stdlib import BoundLogger @@ -12,6 +12,7 @@ from ..ext_deps import BACKY_EXTRACT from ..repository import Repository from ..revision import Revision, Trust +from ..source import Source from ..utils import CHUNK_SIZE, copy, posix_fadvise from .chunked import ChunkedFileBackend from .chunked.chunk import BackendException @@ -41,7 +42,7 @@ def __str__(self): return self.value -class RbdRepository(Repository): +class RbdSource(Source): """A backup of a VM. Provides access to methods to @@ -53,8 +54,7 @@ class RbdRepository(Repository): source: BackySourceFactory quarantine: QuarantineStore - def __init__(self, path: Path, log: BoundLogger): - super().__init__(path, log) + def __init__(self, config: dict[str, Any], log: BoundLogger): # Initialize our source try: @@ -72,6 +72,10 @@ def __init__(self, path: Path, log: BoundLogger): self.quarantine = QuarantineStore(self.path, self.log) + @property + def subcommand(self) -> str: + return "backy-rbd" + @property def problem_reports(self): return [f"{len(self.quarantine.report_ids)} quarantined blocks"] @@ -81,12 +85,9 @@ def problem_reports(self): @Repository.locked(target=".backup", mode="exclusive") @Repository.locked(target=".purge", mode="shared") - def backup(self, rev_uuid: str) -> bool: - new_revision = self.find_by_uuid(rev_uuid) - self.prevent_remote_rev([new_revision]) - - self.path.joinpath("last").unlink(missing_ok=True) - self.path.joinpath("last.rev").unlink(missing_ok=True) + def backup(self, revision: Revision) -> bool: + self.repository.path.joinpath("last").unlink(missing_ok=True) + self.repository.path.joinpath("last.rev").unlink(missing_ok=True) start = time.time() @@ -95,28 +96,26 @@ def backup(self, rev_uuid: str) -> bool: "Source is not ready (does it exist? can you access it?)" ) - with self.source(new_revision) as source: + with self.source(revision) as source: try: - backend = ChunkedFileBackend(new_revision, self.log) + backend = ChunkedFileBackend(revision, self.log) source.backup(backend) verified = source.verify(backend) except BackendException: self.log.exception("backend-error-distrust-all") verified = False - self.distrust("local", skip_lock=True) + self.repository.distrust("local", skip_lock=True) if not verified: self.log.error( "verification-failed", - revision_uuid=new_revision.uuid, + revision_uuid=revision.uuid, ) - new_revision.remove() + revision.remove() else: - self.log.info( - "verification-ok", revision_uuid=new_revision.uuid - ) - new_revision.stats["duration"] = time.time() - start - new_revision.write_info() - new_revision.readonly() + self.log.info("verification-ok", revision_uuid=revision.uuid) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() # Switched from a fine-grained syncing mechanism to "everything # once" when we're done. This is as safe but much faster. os.sync() @@ -125,8 +124,11 @@ def backup(self, rev_uuid: str) -> bool: # verification after a backup - for good measure and to keep things # moving along automatically. This could also be moved into the # scheduler. - self.scan() - for revision in reversed(self.get_history(clean=True, local=True)): + self.repository.scan() + # TODO: move this to cli/daemon? + for revision in reversed( + self.repository.get_history(clean=True, local=True) + ): if revision.trust == Trust.DISTRUSTED: self.log.warning("inconsistent") backend = ChunkedFileBackend(revision, self.log) @@ -135,15 +137,14 @@ def backup(self, rev_uuid: str) -> bool: return verified @Repository.locked(target=".purge", mode="shared") - def verify(self, revision: str) -> None: - rev = self.find_by_uuid(revision) - self.prevent_remote_rev([rev]) - ChunkedFileBackend(rev, self.log).verify() + def verify(self, revision: Revision) -> None: + ChunkedFileBackend(revision, self.log).verify() @Repository.locked(target=".purge", mode="exclusive") def gc(self) -> None: - ChunkedFileBackend(self.local_history[-1], self.log).purge() - self.clear_purge_pending() + ChunkedFileBackend(self.repository.local_history[-1], self.log).purge() + # TODO: move this to cli/daemon? + self.repository.clear_purge_pending() ################# # Restoring @@ -152,12 +153,11 @@ def gc(self) -> None: # restore_stdout and locking isn't re-entrant. def restore( self, - revision: str, + revision: Revision, target: str, restore_backend: RestoreBackend = RestoreBackend.AUTO, ) -> None: - r = self.find_by_uuid(revision) - s = ChunkedFileBackend(r, self.log).open("rb") + s = ChunkedFileBackend(revision, self.log).open("rb") if restore_backend == RestoreBackend.AUTO: if self.backy_extract_supported(s): restore_backend = RestoreBackend.RUST @@ -171,7 +171,7 @@ def restore( else: self.restore_stdout(source) elif restore_backend == RestoreBackend.RUST: - self.restore_backy_extract(r, target) + self.restore_backy_extract(revision, target) def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: log = self.log.bind(subsystem="backy-extract") @@ -195,7 +195,7 @@ def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: # backy-extract acquires lock def restore_backy_extract(self, rev: Revision, target: str) -> None: log = self.log.bind(subsystem="backy-extract") - cmd = [BACKY_EXTRACT, str(self.path / rev.uuid), target] + cmd = [BACKY_EXTRACT, str(self.repository.path / rev.uuid), target] log.debug("started", cmd=cmd) proc = subprocess.Popen(cmd) return_code = proc.wait() diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py index 9f632edf..28b0327d 100644 --- a/src/backy/rbd/sources/__init__.py +++ b/src/backy/rbd/sources/__init__.py @@ -6,7 +6,7 @@ import backy.revision if TYPE_CHECKING: - from backy.rbd import RbdRepository + from backy.rbd import RbdSource from backy.rbd.chunked import ChunkedFileBackend @@ -32,7 +32,7 @@ def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): class BackySourceFactory(ABC): @abstractmethod def __init__( - self, config: dict, repository: "RbdRepository", log: BoundLogger + self, config: dict, repository: "RbdSource", log: BoundLogger ) -> None: ... diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index 514e3b48..e3150a24 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -5,7 +5,7 @@ import backy.utils from backy.revision import Revision -from ... import RbdRepository +from ... import RbdSource from ...chunked import ChunkedFileBackend from ...quarantine import QuarantineReport from .. import BackySource, BackySourceContext, BackySourceFactory @@ -25,11 +25,9 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): log: BoundLogger rbd: RBDClient revision: Revision - repository: RbdRepository + repository: RbdSource - def __init__( - self, config: dict, repository: RbdRepository, log: BoundLogger - ): + def __init__(self, config: dict, repository: RbdSource, log: BoundLogger): self.pool = config["pool"] self.image = config["image"] self.always_full = config.get("full-always", False) diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py index f8295889..48631b1c 100644 --- a/src/backy/rbd/sources/file.py +++ b/src/backy/rbd/sources/file.py @@ -1,7 +1,7 @@ from structlog.stdlib import BoundLogger import backy.rbd.chunked -from backy.rbd import RbdRepository +from backy.rbd import RbdSource from backy.rbd.quarantine import QuarantineReport from backy.rbd.sources import ( BackySource, @@ -16,12 +16,10 @@ class File(BackySource, BackySourceFactory, BackySourceContext): filename: str cow: bool revision: Revision - repository: RbdRepository + repository: RbdSource log: BoundLogger - def __init__( - self, config: dict, repository: RbdRepository, log: BoundLogger - ): + def __init__(self, config: dict, repository: RbdSource, log: BoundLogger): self.repository = repository self.filename = config["filename"] self.cow = config.get("cow", True) diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py index 27ec39d3..daf9daea 100644 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ b/src/backy/rbd/sources/flyingcircus/source.py @@ -7,14 +7,14 @@ from backy.utils import TimeOut, TimeOutError -from ... import RbdRepository +from ... import RbdSource from ..ceph.source import CephRBD class FlyingCircusRootDisk(CephRBD): snapshot_timeout = 90 - def __init__(self, config, repository: RbdRepository, log: BoundLogger): + def __init__(self, config, repository: RbdSource, log: BoundLogger): self.config = config self.vm = config["vm"] self.consul_acl_token = config.get("consul_acl_token") diff --git a/src/backy/rbd/tests/test_backy-rbd.py b/src/backy/rbd/tests/test_backy-rbd.py index 89d4b877..4946ef45 100644 --- a/src/backy/rbd/tests/test_backy-rbd.py +++ b/src/backy/rbd/tests/test_backy-rbd.py @@ -4,7 +4,7 @@ import pytest from backy.ext_deps import BACKY_RBD_CMD, BASH -from backy.rbd import RbdRepository +from backy.rbd import RbdSource from backy.rbd.conftest import create_rev from backy.revision import Revision from backy.tests import Ellipsis @@ -39,7 +39,7 @@ def test_smoketest_internal(tmp_path, log): % source1 ).encode("utf-8") ) - repository = RbdRepository(backup_dir, log) + repository = RbdSource(backup_dir, log) # Backup first state rev1 = create_rev(repository, {"manual:test"}) diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 797fa1c0..c04afcf0 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -98,7 +98,7 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): ) monkeypatch.setattr( - backy.rbd.RbdRepository, + backy.rbd.RbdSource, "backup", partialmethod(print_args, return_value=success), ) @@ -136,7 +136,7 @@ def test_call_unexpected_exception( def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.rbd.RbdRepository, "gc", do_raise) + monkeypatch.setattr(backy.rbd.RbdSource, "gc", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index dad69f1e..ea638a23 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,4 +1,4 @@ -from backy.rbd import RbdRepository +from backy.rbd import RbdSource from backy.rbd.sources.ceph.source import CephRBD @@ -17,7 +17,7 @@ def test_configure_ceph_source(tmp_path, log): image: test04 """ ) - backup = RbdRepository(tmp_path, log) + backup = RbdSource(tmp_path, log) assert isinstance(backup.source, CephRBD) assert backup.source.pool == "test" assert backup.source.image == "test04" diff --git a/src/backy/repository.py b/src/backy/repository.py index 839ccef5..c8e0d481 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -10,7 +10,7 @@ import yaml from structlog.stdlib import BoundLogger -from backy.source import Source +from backy.source import Source, factory_by_type from backy.utils import ( duplicates, list_get, @@ -20,7 +20,6 @@ unique, ) -from .ext_deps import BACKY_RBD_CMD, BACKY_S3_CMD from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule @@ -45,21 +44,6 @@ class StatusDict(TypedDict): local_revs: int -class RepositoryType(Enum): - rbd = BACKY_RBD_CMD - s3 = BACKY_S3_CMD - - @classmethod - def from_str(cls, str: str) -> "RepositoryType": - match str: - case "rbd": - return cls.rbd - case "s3": - return cls.s3 - case _: - raise ValueError("invalid str for RepositoryType: " + str) - - class Repository(object): """A repository stores and manages backups for a single source. @@ -83,7 +67,7 @@ class Repository(object): _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] - type_: RepositoryType + sourcetype: type[Source] def __init__(self, path: Path, log: BoundLogger): self.log = log.bind(subsystem="backup") @@ -105,29 +89,43 @@ def __init__(self, path: Path, log: BoundLogger): self.schedule = Schedule() self.schedule.configure(self.config["schedule"]) - self.type = RepositoryType.from_str(self.config.get("type", "rbd")) + self.sourcetype = factory_by_type( + self.config.get("sourcetype", "backy-rbd") + ) self.scan() @classmethod - def init(cls, path: Path, log: BoundLogger, source: type[Source]) -> "Repository": + def init( + cls, + path: Path, + source: type[Source], + sourceconf: dict, + log: BoundLogger, + ) -> "Repository": if (path / "config").exists(): raise RepositoryNotEmpty(path) if not path.exists(): path.mkdir(parents=True, exist_ok=True) - source_config = source.init(path, log) - - config = {"schedule": {}, "source": source_config} - with open(path / "config", "w") as f: - yaml.dump(config, f) + yaml.dump( + { + "schedule": {}, + "sourcetype": source.subcommand, + "source": sourceconf, + }, + f, + ) log.info(f"repo-initialized", path=path) return cls(path, log) + def get_source(self) -> Source: + return self.sourcetype(self, self.config["source"], self.log) + @property def problem_reports(self) -> list[str]: return [] diff --git a/src/backy/source.py b/src/backy/source.py index 9e090049..73825a40 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,17 +1,20 @@ +from abc import ABC, abstractmethod, abstractproperty from importlib.metadata import entry_points -from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from structlog.stdlib import BoundLogger +if TYPE_CHECKING: + from backy.repository import Repository + SOURCE_PLUGINS = entry_points(group="backy.sources") -def factory_by_type(type_): +def factory_by_type(type_) -> type["Source"]: return SOURCE_PLUGINS[type_].load() -class Source: +class Source(ABC): """A source provides specific implementations for making and restoring backups. @@ -44,9 +47,15 @@ class Source: """ - type_: str - config: dict[str, Any] + repository: "Repository" + log: BoundLogger + + @abstractmethod + def __init__(self, repository: "Repository", log: BoundLogger): + self.repository = repository + self.log = log - @classmethod - def init(cls, repository: Path, log: BoundLogger) -> dict[str, Any]: - return {"type": cls.type_} + @property + @abstractmethod + def subcommand(self) -> str: + ... From de93cf4f8c298e9b9c1b47b27ee124ea3e4aa861 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Thu, 27 Jun 2024 16:32:35 +0200 Subject: [PATCH 17/25] snapshot: round off the repository/source bootstrap pattern start demonstrating the general workflow using the simplistic file source clean up old setuptools configs --- pyproject.toml | 6 ++ setup.cfg | 20 ------ setup.py | 109 ----------------------------- src/backy/cli/tests/test_client.py | 2 - src/backy/cli/tests/test_main.py | 1 - src/backy/file/__init__.py | 12 ++-- src/backy/file/tests/test_file.py | 45 +++++++++--- src/backy/repository.py | 75 ++++++-------------- src/backy/source.py | 21 ++++-- 9 files changed, 89 insertions(+), 202 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 6043ac54..9dac99a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,3 +90,9 @@ backy-file = "backy.file:main" [[tool.mypy.overrides]] module = "backy.*" check_untyped_defs = true + +[tool.zest-releaser] +prereleaser.before = [ "release_helper.ignore_history_file" ] +prereleaser.middle = [ "release_helper.update_poetry_version release_helper.scriv_collect" ] +postreleaser.before = [ "release_helper.ignore_history_file" ] +postreleaser.middle = [ "release_helper.update_poetry_version" ] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 97197c5e..00000000 --- a/setup.cfg +++ /dev/null @@ -1,20 +0,0 @@ -[upload_sphinx] -upload-dir = build/doc - -[yapf] -based_on_style = pep8 -column_limit = 79 -split_before_expression_after_opening_paren = true -split_before_closing_bracket = false -SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = false -BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true - -[flake8] -max-line-length = 80 -extend-ignore = E203 - -[zest.releaser] -prereleaser.before = release_helper.ignore_history_file -prereleaser.middle = release_helper.update_poetry_version release_helper.scriv_collect -postreleaser.before = release_helper.ignore_history_file -postreleaser.middle = release_helper.update_poetry_version diff --git a/setup.py b/setup.py deleted file mode 100644 index 14ffe3ba..00000000 --- a/setup.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Block-based backup and restore utility for virtual machine images""" - -import codecs -import glob -import os.path as p -import subprocess -import sys - -from setuptools import Command, find_packages, setup - - -class PyTest(Command): - """Invoke py.test from `bin/python setup.py test`.""" - - user_options = [] # type: ignore - - def initialize_options(self): - return None - - def finalize_options(self): - return None - - def run(self): - errno = subprocess.call( - [ - sys.executable, - p.join(p.dirname(__file__), "bin", "py.test"), - "-m1", - ] - ) - raise SystemExit(errno) - - -def open_project_path(filename): - fullname = p.join(p.dirname(__file__), filename) - return codecs.open(fullname, encoding="ascii") - - -def long_desc(): - parts = [] - for name in ("README.txt", "CHANGES.txt"): - with open_project_path(name) as f: - parts.append(f.read()) - return "\n".join(parts) - - -setup( - name="backy", - version="2.6.0.dev0", - install_requires=[ - "consulate", - "packaging", - "tzlocal", - "PyYaml", - "setuptools", - "shortuuid", - "python-lzo", - "humanize", - "mmh3", - "structlog", - "aiohttp", - "rich", - ], - extras_require={ - "test": [ - "pytest", - "pytest-asyncio", - "pytest-cache", - "pytest-cov", - "pytest-flake8", - "pytest-timeout", - ], - }, - author=( - "Christian Theune , " - "Christian Kauhaus , " - "Daniel Kraft " - ), - author_email="mail@flyingcircus.io", - license="GPL-3", - url="https://bitbucket.org/flyingcircus/backy", - keywords="backup", - classifiers="""\ -Development Status :: 5 - Production/Stable -Environment :: Console -Intended Audience :: System Administrators -License :: OSI Approved :: GNU General Public License v3 (GPLv3) -Operating System :: POSIX -Programming Language :: Python -Programming Language :: Python :: 3 -Programming Language :: Python :: 3.6 -Programming Language :: Python :: 3.7 -Programming Language :: Python :: 3.8 -Programming Language :: Python :: 3.9 -Topic :: System :: Archiving :: Backup -"""[ - :-1 - ].split( - "\n" - ), - description=__doc__.strip(), - long_description=long_desc(), - packages=find_packages("src"), - package_dir={"": "src"}, - include_package_data=True, - data_files=[("", glob.glob("*.txt"))], - zip_safe=False, - cmdclass={"test": PyTest}, -) diff --git a/src/backy/cli/tests/test_client.py b/src/backy/cli/tests/test_client.py index 470a8886..bd354541 100644 --- a/src/backy/cli/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -6,9 +6,7 @@ from aiohttp.web_exceptions import HTTPUnauthorized from backy import utils -from backy.cli.client import CLIClient from backy.daemon.api import BackyAPI, Client -from backy.daemon.tests.test_daemon import daemon from backy.rbd.quarantine import QuarantineReport from backy.revision import Revision from backy.tests import Ellipsis diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 4ed9823e..2746c992 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -11,7 +11,6 @@ from backy.revision import Revision from backy.tests import Ellipsis import backy.cli -import backy.cli.client @pytest.fixture def argv(): diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 0e0adef3..a2970e51 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -18,12 +18,14 @@ class FileSource(Source): path: Path # the source we are backing up + type_ = "file" - def __init__( - self, repository: Repository, config: dict[str, Any], log: BoundLogger - ): - super().__init__(repository, log) - self.path = Path(config["path"]) + def __init__(self, path: Path): + self.path = path + + @classmethod + def from_config(cls, config: dict[str, Any]): + return cls(path=Path(config['path'])) @property def subcommand(self) -> str: diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py index f31b78b8..babeb508 100644 --- a/src/backy/file/tests/test_file.py +++ b/src/backy/file/tests/test_file.py @@ -1,23 +1,52 @@ from typing import cast +import yaml from backy.file import FileSource from backy.repository import Repository from backy.revision import Revision +from backy.schedule import Schedule -def test_simple_cycle(tmp_path, log): +def test_bootstrap_from_api(tmp_path, log): + original = tmp_path / "original.txt" + + source = FileSource(original) + + schedule = Schedule() + + repo_path = tmp_path / "repository" + repository = Repository(repo_path, source, schedule, log) + + exercise_fresh_repo(repository) + + +def test_bootstrap_from_config(tmp_path, log): original = tmp_path / "original.txt" - with open(original, "w") as f: - f.write("This is the original file.") repo_path = tmp_path / "repository" - repository = Repository.init( - repo_path, FileSource, FileSource.to_config(repo_path), log - ) - source = cast(FileSource, repository.get_source()) + config = { + "path": repo_path, + "schedule": {}, + "source": {"type": "file", "path": str(original)}, + } + + repository = Repository.from_config(config, log) + + exercise_fresh_repo(repository) + + +def exercise_fresh_repo(repository: Repository): + source = cast(FileSource, repository.source) + + original = source.path + + repository.connect() + + with open(original, "w") as f: + f.write("This is the original file.") - revision = Revision.create(repository, {"test"}, log) + revision = Revision.create(repository, {"test"}, repository.log) source.backup(revision) with open(original, "w") as f: diff --git a/src/backy/repository.py b/src/backy/repository.py index c8e0d481..8c2a1add 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -4,13 +4,13 @@ from enum import Enum from math import ceil, floor from pathlib import Path -from typing import IO, List, Literal, Optional, TypedDict +from typing import IO, List, Literal, Optional, TypedDict, Any import tzlocal import yaml from structlog.stdlib import BoundLogger -from backy.source import Source, factory_by_type +import backy.source from backy.utils import ( duplicates, list_get, @@ -60,71 +60,42 @@ class Repository(object): """ path: Path - config: dict schedule: Schedule history: list[Revision] log: BoundLogger _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] - sourcetype: type[Source] + sourcetype: type[backy.source.Source] - def __init__(self, path: Path, log: BoundLogger): + def __init__( + self, + path: Path, + source: backy.source.Source, + schedule: Schedule, + log: BoundLogger, + ): + self.schedule = schedule + self.source = source + self.source.bind(self, log) self.log = log.bind(subsystem="backup") self.path = path.resolve() self._lock_fds = {} - # Load config from file - try: - with self.path.joinpath("config").open(encoding="utf-8") as f: - self.config = yaml.safe_load(f) - except IOError: - self.log.error( - "could-not-read-config", - _fmt_msg="Could not read config file. Is the path correct?", - config_path=str(self.path / "config"), - ) - raise - - self.schedule = Schedule() - self.schedule.configure(self.config["schedule"]) - - self.sourcetype = factory_by_type( - self.config.get("sourcetype", "backy-rbd") - ) - + def connect(self): + self.path.mkdir(parents=True, exist_ok=True) self.scan() - @classmethod - def init( - cls, - path: Path, - source: type[Source], - sourceconf: dict, - log: BoundLogger, - ) -> "Repository": - if (path / "config").exists(): - raise RepositoryNotEmpty(path) - - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - - with open(path / "config", "w") as f: - yaml.dump( - { - "schedule": {}, - "sourcetype": source.subcommand, - "source": sourceconf, - }, - f, - ) - - log.info(f"repo-initialized", path=path) + @staticmethod + def from_config(config: dict[str, Any], log: BoundLogger) -> "Repository": + schedule = Schedule() + schedule.configure(config["schedule"]) - return cls(path, log) + source = backy.source.factory_by_type( + config["source"]["type"] + ).from_config(config["source"]) - def get_source(self) -> Source: - return self.sourcetype(self, self.config["source"], self.log) + return Repository(config['path'], source, schedule, log) @property def problem_reports(self) -> list[str]: diff --git a/src/backy/source.py b/src/backy/source.py index 73825a40..14c0cdb2 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractmethod, abstractproperty +from abc import ABC, abstractmethod from importlib.metadata import entry_points from typing import TYPE_CHECKING, Any @@ -6,6 +6,7 @@ if TYPE_CHECKING: from backy.repository import Repository + from backy.revision import Revision SOURCE_PLUGINS = entry_points(group="backy.sources") @@ -47,15 +48,25 @@ class Source(ABC): """ + type_: str + repository: "Repository" log: BoundLogger - @abstractmethod - def __init__(self, repository: "Repository", log: BoundLogger): + def bind(self, repository: "Repository", log: BoundLogger): self.repository = repository self.log = log - @property + @classmethod + @abstractmethod + def from_config(cls, config: dict[str, Any]) -> "Source": + ... + + + @abstractmethod + def backup(self, revision: "Revision") -> "Source": + ... + @abstractmethod - def subcommand(self) -> str: + def restore(self, revision: "Revision", *args, **kw) -> "Source": ... From a7893b2bf38c21330fb97bc1712c97e6de4e64b7 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Thu, 27 Jun 2024 23:47:24 +0200 Subject: [PATCH 18/25] snapshot: simplify backy-rbd --- src/backy/cli/tests/test_main.py | 3 +- src/backy/file/__init__.py | 11 +- src/backy/file/tests/test_file.py | 1 + src/backy/logging.py | 13 +- src/backy/rbd/chunked/__init__.py | 121 ------------- src/backy/rbd/chunked/chunk.py | 6 +- src/backy/rbd/chunked/file.py | 2 - src/backy/rbd/chunked/store.py | 2 - src/backy/rbd/rbdsource.py | 170 +++++++++++++----- src/backy/rbd/sources/__init__.py | 68 ------- src/backy/rbd/sources/ceph/source.py | 38 ++-- src/backy/rbd/sources/ceph/tests/__init__.py | 0 .../tests/test_fc_source.py | 0 src/backy/rbd/sources/file.py | 73 -------- src/backy/rbd/sources/flyingcircus/source.py | 5 +- src/backy/repository.py | 8 +- src/backy/revision.py | 21 +-- src/backy/source.py | 8 +- 18 files changed, 166 insertions(+), 384 deletions(-) delete mode 100644 src/backy/rbd/sources/__init__.py create mode 100644 src/backy/rbd/sources/ceph/tests/__init__.py rename src/backy/rbd/sources/{flyingcircus => ceph}/tests/test_fc_source.py (100%) delete mode 100644 src/backy/rbd/sources/file.py diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 2746c992..cf25061a 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -6,11 +6,12 @@ import pytest +import backy.cli import backy.repository from backy import utils from backy.revision import Revision from backy.tests import Ellipsis -import backy.cli + @pytest.fixture def argv(): diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index a2970e51..0261a318 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -19,17 +19,16 @@ class FileSource(Source): path: Path # the source we are backing up type_ = "file" + subcommand = "backy-file" def __init__(self, path: Path): self.path = path @classmethod - def from_config(cls, config: dict[str, Any]): - return cls(path=Path(config['path'])) - - @property - def subcommand(self) -> str: - return "backy-file" + def from_config( + cls, config: dict[str, Any], log: BoundLogger + ) -> "FileSource": + return cls(path=Path(config["path"])) @staticmethod def to_config(path: Path) -> dict[str, Any]: diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py index babeb508..19d41cb1 100644 --- a/src/backy/file/tests/test_file.py +++ b/src/backy/file/tests/test_file.py @@ -1,6 +1,7 @@ from typing import cast import yaml + from backy.file import FileSource from backy.repository import Repository from backy.revision import Revision diff --git a/src/backy/logging.py b/src/backy/logging.py index da36ff32..988ead79 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -118,9 +118,7 @@ def __getattr__(self, name): def prefix(prefix, line): - return "{}>\t".format(prefix) + line.replace( - "\n", "\n{}>\t".format(prefix) - ) + return "{}>\t".format(prefix) + line.replace("\n", "\n{}>\t".format(prefix)) class ConsoleFileRenderer: @@ -145,9 +143,7 @@ def __init__(self, min_level, pad_event=_EVENT_WIDTH): self.min_level = self.LEVELS.index(min_level.lower()) if colorama is None: print( - _MISSING.format( - who=self.__class__.__name__, package="colorama" - ) + _MISSING.format(who=self.__class__.__name__, package="colorama") ) if COLORIZED_TTY_OUTPUT: colorama.init() @@ -216,10 +212,7 @@ def write(line): level = event_dict.pop("level", None) if level is not None: write( - self._level_to_color[level] - + level[0].upper() - + RESET_ALL - + " " + self._level_to_color[level] + level[0].upper() + RESET_ALL + " " ) job_name = event_dict.pop("job_name", "-") diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py index 446848f4..e69de29b 100644 --- a/src/backy/rbd/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -1,121 +0,0 @@ -from pathlib import Path -from typing import Optional, Set - -from structlog.stdlib import BoundLogger - -from backy.revision import Revision, Trust -from backy.utils import END, report_status - -from ...repository import Repository -from .chunk import Chunk, Hash -from .file import File -from .store import Store - - -class ChunkedFileBackend: - # multiple Backends may share the same store - STORES: dict[Path, Store] = dict() - repository: Repository - revision: Revision - store: Store - log: BoundLogger - - def __init__(self, revision: Revision, log: BoundLogger): - self.repository = revision.repository - self.revision = revision - path = self.repository.path / "chunks" - if path not in self.STORES: - self.STORES[path] = Store(self.repository.path / "chunks", log) - self.store = self.STORES[path] - self.log = log.bind(subsystem="chunked") - - def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> File: # type: ignore[override] - if "w" in mode or "+" in mode: - if parent and not self.revision.filename.exists(): - with self.revision.filename.open( - "wb" - ) as new, parent.filename.open("rb") as old: - # This is ok, this is just metadata, not the actual data. - new.write(old.read()) - overlay = False - if mode == "o": - mode = "rw" - overlay = True - file = File(self.revision.filename, self.store, mode, overlay) - - if file.writable() and self.repository.contains_distrusted: - # "Force write"-mode if any revision is distrusted. - self.log.warn("forcing-full") - self.store.force_writes = True - - return file - - def purge(self) -> None: - self.log.debug("purge") - used_chunks: Set[Hash] = set() - for revision in self.repository.local_history: - used_chunks.update( - type(self)(revision, self.log).open()._mapping.values() - ) - self.store.purge(used_chunks) - - @report_status - def verify(self): - log = self.log.bind(revision_uuid=self.revision.uuid) - log.info("verify-start") - verified_chunks: Set[Hash] = set() - - # Load verified chunks to avoid duplicate work - for revision in self.repository.get_history(clean=True, local=True): - if revision.trust != Trust.VERIFIED: - continue - verified_chunks.update( - type(self)(revision, self.log).open()._mapping.values() - ) - - log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) - - errors = False - # Go through all chunks and check them. Delete problematic ones. - f = self.open() - hashes = set(f._mapping.values()) - verified_chunks - yield len(hashes) + 2 - for candidate in hashes: - yield - if candidate in verified_chunks: - continue - try: - c = Chunk(self.store, candidate) - c._read_existing() - except Exception: - log.exception("verify-error", chunk=candidate) - errors = True - if self.store.chunk_path(candidate).exists(): - try: - self.store.chunk_path(candidate).unlink() - except Exception: - log.exception("verify-remove-error", chunk=candidate) - # This is an optimisation: we can skip this revision, purge it - # and then keep verifying other chunks. This avoids checking - # things unnecessarily in duplicate. - # And we only mark it as verified if we never saw any problems. - break - - yield - - if errors: - # Found any issues? Delete this revision as we can't trust it. - self.revision.remove() - else: - # No problems found - mark as verified. - self.revision.verify() - self.revision.write_info() - - yield - - # Purge to ensure that we don't leave unused, potentially untrusted - # stuff around, especially if this was the last revision. - self.purge() - - yield END - yield None diff --git a/src/backy/rbd/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py index 03fa9b4d..74d57861 100644 --- a/src/backy/rbd/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -132,11 +132,8 @@ def flush(self) -> Optional[Hash]: # use a faster path to get the data. self.hash = hash(self.data.getvalue()) target = self.store.chunk_path(self.hash) - needs_forced_write = ( - self.store.force_writes and self.hash not in self.store.seen_forced - ) if self.hash not in self.store.seen: - if needs_forced_write or not target.exists(): + if self.store.force_writes or not target.exists(): # Create the tempfile in the right directory to increase locality # of our change - avoid renaming between multiple directories to # reduce traffic on the directory nodes. @@ -149,7 +146,6 @@ def flush(self) -> Optional[Hash]: # metadata flushes and then changing metadata again. os.chmod(tmpfile_name, 0o440) os.rename(tmpfile_name, target) - self.store.seen_forced.add(self.hash) self.store.seen.add(self.hash) self.clean = True return self.hash diff --git a/src/backy/rbd/chunked/file.py b/src/backy/rbd/chunked/file.py index 29f361bd..7dccd428 100644 --- a/src/backy/rbd/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -6,8 +6,6 @@ from collections import defaultdict from typing import TYPE_CHECKING, Optional, Tuple -import backy.rbd.chunked - from .chunk import Chunk, Hash if TYPE_CHECKING: diff --git a/src/backy/rbd/chunked/store.py b/src/backy/rbd/chunked/store.py index 50c94146..123236e2 100644 --- a/src/backy/rbd/chunked/store.py +++ b/src/backy/rbd/chunked/store.py @@ -22,13 +22,11 @@ class Store(object): force_writes = False path: Path - seen_forced: set[Hash] seen: set[Hash] log: BoundLogger def __init__(self, path: Path, log: BoundLogger): self.path = path - self.seen_forced = set() self.log = log.bind(subsystem="chunked-store") for x in range(256): subdir = self.path / f"{x:02x}" diff --git a/src/backy/rbd/rbdsource.py b/src/backy/rbd/rbdsource.py index 8f1eaba5..188b0b18 100644 --- a/src/backy/rbd/rbdsource.py +++ b/src/backy/rbd/rbdsource.py @@ -3,7 +3,7 @@ import time from enum import Enum from pathlib import Path -from typing import IO, Any +from typing import IO, Any, Optional, Set from structlog.stdlib import BoundLogger @@ -13,11 +13,12 @@ from ..repository import Repository from ..revision import Revision, Trust from ..source import Source -from ..utils import CHUNK_SIZE, copy, posix_fadvise -from .chunked import ChunkedFileBackend -from .chunked.chunk import BackendException +from ..utils import CHUNK_SIZE, END, copy, posix_fadvise, report_status +from .chunked.chunk import BackendException, Chunk, Hash +from .chunked.file import File +from .chunked.store import Store from .quarantine import QuarantineStore -from .sources import BackySourceFactory, select_source +from .sources.flyingcircus.source import FlyingCircusRootDisk # Locking strategy: # @@ -43,42 +44,49 @@ def __str__(self): class RbdSource(Source): - """A backup of a VM. + type_ = "rbd" + subcommand = "backy-rbd" - Provides access to methods to - - - backup, restore, and list revisions - - """ - - source: BackySourceFactory + source: FlyingCircusRootDisk + store: Store quarantine: QuarantineStore - - def __init__(self, config: dict[str, Any], log: BoundLogger): - - # Initialize our source - try: - source_factory = select_source(self.config["source"]["type"]) - except IndexError: - self.log.error( - "source-type-unavailable", - _fmt_msg="No source type named `{type}` exists.", - type=self.config["source"]["type"], - ) - raise - self.source = source_factory(self.config["source"], self, self.log) - - assert self.config["source"].get("backend", "chunked") == "chunked" - - self.quarantine = QuarantineStore(self.path, self.log) - - @property - def subcommand(self) -> str: - return "backy-rbd" - - @property - def problem_reports(self): - return [f"{len(self.quarantine.report_ids)} quarantined blocks"] + log: BoundLogger + + def __init__(self, rbdsource: FlyingCircusRootDisk, log: BoundLogger): + self.source = rbdsource + self.log = log.bind(subsystem="rbdsource") + + @classmethod + def from_config(cls, config: dict[str, Any], log: BoundLogger) -> "Source": + assert config.get("backend", "chunked") == "chunked" + return cls(FlyingCircusRootDisk(config, log), log) + + def bind(self, repository: "Repository") -> None: + super().bind(repository) + # TODO: move quarantine to repo + self.quarantine = QuarantineStore(repository.path, self.log) + self.store = Store(repository.path / "chunks") + + def _path_for_revision(self, revision: Revision) -> Path: + return self.repository.path / revision.uuid + + def open( + self, revision: Revision, parent: Optional[Revision] = None + ) -> File: + if parent and not self._path_for_revision(revision).exists(): + with self._path_for_revision(revision).open( + "wb" + ) as new, self._path_for_revision(parent).open("rb") as old: + # This is ok, this is just metadata, not the actual data. + new.write(old.read()) + file = File(self._path_for_revision(revision), self.store) + + if file.writable() and self.repository.contains_distrusted: + # "Force write"-mode if any revision is distrusted. + self.log.warn("forcing-full") + self.store.force_writes = True + + return file ################# # Making backups @@ -98,9 +106,14 @@ def backup(self, revision: Revision) -> bool: with self.source(revision) as source: try: - backend = ChunkedFileBackend(revision, self.log) - source.backup(backend) - verified = source.verify(backend) + parent_rev = source.get_parent() + file = self.open(revision, parent_rev) + if parent_rev: + source.full(file) + else: + source.diff(file, parent_rev) + file = self.open(revision) + verified = source.verify(file, self.quarantine) except BackendException: self.log.exception("backend-error-distrust-all") verified = False @@ -131,18 +144,81 @@ def backup(self, revision: Revision) -> bool: ): if revision.trust == Trust.DISTRUSTED: self.log.warning("inconsistent") - backend = ChunkedFileBackend(revision, self.log) - backend.verify() + self.verify(revision, skip_lock=True) break return verified @Repository.locked(target=".purge", mode="shared") - def verify(self, revision: Revision) -> None: - ChunkedFileBackend(revision, self.log).verify() + @report_status + def verify(self, revision: Revision): + log = self.log.bind(revision_uuid=revision.uuid) + log.info("verify-start") + verified_chunks: Set[Hash] = set() + + # Load verified chunks to avoid duplicate work + for verified_revision in self.repository.get_history( + clean=True, local=True + ): + if verified_revision.trust != Trust.VERIFIED: + continue + verified_chunks.update( + self.open(verified_revision)._mapping.values() + ) + + log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) + + errors = False + # Go through all chunks and check them. Delete problematic ones. + f = self.open(revision) + hashes = set(f._mapping.values()) - verified_chunks + yield len(hashes) + 2 + for candidate in hashes: + yield + if candidate in verified_chunks: + continue + try: + c = Chunk(self.store, candidate) + c._read_existing() + except Exception: + log.exception("verify-error", chunk=candidate) + errors = True + try: + self.store.chunk_path(candidate).unlink(missing_ok=True) + except Exception: + log.exception("verify-remove-error", chunk=candidate) + # This is an optimisation: we can skip this revision, purge it + # and then keep verifying other chunks. This avoids checking + # things unnecessarily in duplicate. + # And we only mark it as verified if we never saw any problems. + break + + yield + + # TODO: move this to cli/daemon? + if errors: + # Found any issues? Delete this revision as we can't trust it. + revision.remove() + else: + # No problems found - mark as verified. + revision.verify() + revision.write_info() + + yield + + # Purge to ensure that we don't leave unused, potentially untrusted + # stuff around, especially if this was the last revision. + self.gc(skip_lock=True) + + yield END + yield None @Repository.locked(target=".purge", mode="exclusive") def gc(self) -> None: - ChunkedFileBackend(self.repository.local_history[-1], self.log).purge() + self.log.debug("purge") + used_chunks: Set[Hash] = set() + for revision in self.repository.local_history: + used_chunks.update(self.open(revision)._mapping.values()) + self.store.purge(used_chunks) # TODO: move this to cli/daemon? self.repository.clear_purge_pending() diff --git a/src/backy/rbd/sources/__init__.py b/src/backy/rbd/sources/__init__.py deleted file mode 100644 index 28b0327d..00000000 --- a/src/backy/rbd/sources/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Type - -from structlog.stdlib import BoundLogger - -import backy.revision - -if TYPE_CHECKING: - from backy.rbd import RbdSource - from backy.rbd.chunked import ChunkedFileBackend - - -class BackySource(ABC): - @abstractmethod - def backup(self, target: "ChunkedFileBackend") -> None: - ... - - @abstractmethod - def verify(self, target: "ChunkedFileBackend") -> bool: - ... - - -class BackySourceContext(ABC): - @abstractmethod - def __enter__(self) -> BackySource: - ... - - def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): - pass - - -class BackySourceFactory(ABC): - @abstractmethod - def __init__( - self, config: dict, repository: "RbdSource", log: BoundLogger - ) -> None: - ... - - @abstractmethod - def __call__( - self, revision: "backy.revision.Revision" - ) -> BackySourceContext: - ... - - @abstractmethod - def ready(self) -> bool: - """Check whether the source can be backed up.""" - ... - - -def select_source(type_: str) -> Type[BackySourceFactory]: - match type_: - case "flyingcircus": - from backy.rbd.sources.flyingcircus.source import ( - FlyingCircusRootDisk, - ) - - return FlyingCircusRootDisk - case "ceph-rbd": - from backy.rbd.sources.ceph.source import CephRBD - - return CephRBD - case "file": - from backy.rbd.sources.file import File - - return File - case _: - raise ValueError(f"invalid backend: {type_}") diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py index e3150a24..4cdca55b 100644 --- a/src/backy/rbd/sources/ceph/source.py +++ b/src/backy/rbd/sources/ceph/source.py @@ -1,18 +1,17 @@ import time +from typing import Optional from structlog.stdlib import BoundLogger import backy.utils from backy.revision import Revision -from ... import RbdSource -from ...chunked import ChunkedFileBackend -from ...quarantine import QuarantineReport -from .. import BackySource, BackySourceContext, BackySourceFactory +from ...chunked.file import File +from ...quarantine import QuarantineReport, QuarantineStore from .rbd import RBDClient -class CephRBD(BackySource, BackySourceFactory, BackySourceContext): +class CephRBD: """The Ceph RBD source. Manages snapshots corresponding to revisions and provides a verification @@ -25,15 +24,13 @@ class CephRBD(BackySource, BackySourceFactory, BackySourceContext): log: BoundLogger rbd: RBDClient revision: Revision - repository: RbdSource - def __init__(self, config: dict, repository: RbdSource, log: BoundLogger): + def __init__(self, config: dict, log: BoundLogger): self.pool = config["pool"] self.image = config["image"] self.always_full = config.get("full-always", False) self.log = log.bind(subsystem="ceph") self.rbd = RBDClient(self.log) - self.repository = repository def ready(self) -> bool: """Check whether the source can be backed up. @@ -70,18 +67,16 @@ def _image_name(self) -> str: def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): self._delete_old_snapshots() - def backup(self, target: ChunkedFileBackend) -> None: + def get_parent(self) -> Optional[Revision]: if self.always_full: self.log.info("backup-always-full") - self.full(target) - return + return None revision = self.revision while True: parent = revision.get_parent() if not parent: self.log.info("backup-no-valid-parent") - self.full(target) - return + return None if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): self.log.info( "ignoring-rev-without-snapshot", @@ -90,15 +85,14 @@ def backup(self, target: ChunkedFileBackend) -> None: revision = parent continue # Ok, it's trusted and we have a snapshot. Let's do a diff. - break - self.diff(target, parent) + return parent - def diff(self, target: ChunkedFileBackend, parent: Revision) -> None: + def diff(self, target: "RbdSource", parent: Revision) -> None: self.log.info("diff") snap_from = "backy-" + parent.uuid snap_to = "backy-" + self.revision.uuid s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) - with s as source, target.open("r+b", parent) as target_: + with s as source, target.open(self.revision, parent) as target_: bytes = source.integrate(target_, snap_from, snap_to) self.log.info("diff-integration-finished") @@ -109,13 +103,13 @@ def diff(self, target: ChunkedFileBackend, parent: Revision) -> None: self.revision.stats["chunk_stats"] = chunk_stats - def full(self, target: ChunkedFileBackend) -> None: + def full(self, target: File) -> None: self.log.info("full") s = self.rbd.export( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) copied = 0 - with s as source, target.open("r+b") as target_: + with s as source, target as target_: while True: buf = source.read(4 * backy.utils.MiB) if not buf: @@ -129,18 +123,18 @@ def full(self, target: ChunkedFileBackend) -> None: self.revision.stats["chunk_stats"] = chunk_stats - def verify(self, target: ChunkedFileBackend) -> bool: + def verify(self, target: File, quarantine: QuarantineStore) -> bool: s = self.rbd.image_reader( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) self.revision.stats["ceph-verification"] = "partial" - with s as source, target.open("rb") as target_: + with s as source, target as target_: self.log.info("verify") return backy.utils.files_are_roughly_equal( source, target_, - report=lambda s, t, o: self.repository.quarantine.add_report( + report=lambda s, t, o: quarantine.add_report( QuarantineReport(s, t, o) ), ) diff --git a/src/backy/rbd/sources/ceph/tests/__init__.py b/src/backy/rbd/sources/ceph/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py b/src/backy/rbd/sources/ceph/tests/test_fc_source.py similarity index 100% rename from src/backy/rbd/sources/flyingcircus/tests/test_fc_source.py rename to src/backy/rbd/sources/ceph/tests/test_fc_source.py diff --git a/src/backy/rbd/sources/file.py b/src/backy/rbd/sources/file.py deleted file mode 100644 index 48631b1c..00000000 --- a/src/backy/rbd/sources/file.py +++ /dev/null @@ -1,73 +0,0 @@ -from structlog.stdlib import BoundLogger - -import backy.rbd.chunked -from backy.rbd import RbdSource -from backy.rbd.quarantine import QuarantineReport -from backy.rbd.sources import ( - BackySource, - BackySourceContext, - BackySourceFactory, -) -from backy.revision import Revision -from backy.utils import copy, copy_overwrite, files_are_equal - - -class File(BackySource, BackySourceFactory, BackySourceContext): - filename: str - cow: bool - revision: Revision - repository: RbdSource - log: BoundLogger - - def __init__(self, config: dict, repository: RbdSource, log: BoundLogger): - self.repository = repository - self.filename = config["filename"] - self.cow = config.get("cow", True) - self.log = log.bind(filename=self.filename, subsystem="file") - - def __call__(self, revision: Revision): - self.revision = revision - self.log = self.log.bind(revision_uuid=revision.uuid) - return self - - def __enter__(self): - return self - - def ready(self) -> bool: - """Check whether the source can be backed up. - - For files this means the file exists and is readable. - - """ - try: - with open(self.filename, "rb"): - pass - except Exception: - return False - return True - - def backup(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> None: - self.log.debug("backup") - s = open(self.filename, "rb") - parent = self.revision.get_parent() - with s as source, target.open("r+b", parent) as target_: - if self.cow and parent: - self.log.info("backup-sparse") - bytes = copy_overwrite(source, target_) - else: - self.log.info("backup-full") - bytes = copy(source, target_) - - self.revision.stats["bytes_written"] = bytes - - def verify(self, target: "backy.rbd.chunked.ChunkedFileBackend") -> bool: - self.log.info("verify") - s = open(self.filename, "rb") - with s as source, target.open("rb") as target_: - return files_are_equal( - source, - target_, - report=lambda s, t, o: self.repository.quarantine.add_report( - QuarantineReport(s, t, o) - ), - ) diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py index daf9daea..5dc681f3 100644 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ b/src/backy/rbd/sources/flyingcircus/source.py @@ -14,11 +14,10 @@ class FlyingCircusRootDisk(CephRBD): snapshot_timeout = 90 - def __init__(self, config, repository: RbdSource, log: BoundLogger): - self.config = config + def __init__(self, config, log: BoundLogger): + super(FlyingCircusRootDisk, self).__init__(config, log) self.vm = config["vm"] self.consul_acl_token = config.get("consul_acl_token") - super(FlyingCircusRootDisk, self).__init__(config, repository, log) self.log = self.log.bind(vm=self.vm, subsystem="fc-disk") def create_snapshot(self, name: str) -> None: diff --git a/src/backy/repository.py b/src/backy/repository.py index 8c2a1add..2d3cb0c6 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -4,7 +4,7 @@ from enum import Enum from math import ceil, floor from pathlib import Path -from typing import IO, List, Literal, Optional, TypedDict, Any +from typing import IO, Any, List, Literal, Optional, TypedDict import tzlocal import yaml @@ -77,7 +77,6 @@ def __init__( ): self.schedule = schedule self.source = source - self.source.bind(self, log) self.log = log.bind(subsystem="backup") self.path = path.resolve() self._lock_fds = {} @@ -85,6 +84,7 @@ def __init__( def connect(self): self.path.mkdir(parents=True, exist_ok=True) self.scan() + self.source.bind(self) @staticmethod def from_config(config: dict[str, Any], log: BoundLogger) -> "Repository": @@ -93,9 +93,9 @@ def from_config(config: dict[str, Any], log: BoundLogger) -> "Repository": source = backy.source.factory_by_type( config["source"]["type"] - ).from_config(config["source"]) + ).from_config(config["source"], log) - return Repository(config['path'], source, schedule, log) + return Repository(config["path"], source, schedule, log) @property def problem_reports(self) -> list[str]: diff --git a/src/backy/revision.py b/src/backy/revision.py index ef31a338..ba531a64 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -94,15 +94,11 @@ def from_dict(cls, metadata, backup, log): r.trust = Trust(metadata.get("trust", Trust.TRUSTED.value)) return r - @property - def filename(self) -> Path: - """Full pathname of the image file.""" - return self.repository.path / self.uuid - @property def info_filename(self) -> Path: """Full pathname of the metadata file.""" - return self.filename.with_suffix(self.filename.suffix + ".rev") + p = self.repository.path / self.uuid + return p.with_suffix(p.suffix + ".rev") def materialize(self) -> None: self.write_info() @@ -150,24 +146,19 @@ def remove(self, force=False) -> None: self.tags = set() self.write_info() else: - for filename in self.filename.parent.glob(self.filename.name + "*"): - if filename.exists(): - self.log.debug("remove-start", filename=filename) - filename.unlink() - self.log.debug("remove-end", filename=filename) + if self.info_filename.exists(): + self.log.debug("remove-start", filename=self.info_filename) + self.info_filename.unlink() + self.log.debug("remove-end", filename=self.info_filename) if self in self.repository.history: self.repository.history.remove(self) del self.repository._by_uuid[self.uuid] def writable(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o640) self.info_filename.chmod(0o640) def readonly(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o440) self.info_filename.chmod(0o440) def get_parent(self, ignore_trust=False) -> Optional["Revision"]: diff --git a/src/backy/source.py b/src/backy/source.py index 14c0cdb2..9c45f3dd 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -49,20 +49,18 @@ class Source(ABC): """ type_: str + subcommand: str repository: "Repository" - log: BoundLogger - def bind(self, repository: "Repository", log: BoundLogger): + def bind(self, repository: "Repository") -> None: self.repository = repository - self.log = log @classmethod @abstractmethod - def from_config(cls, config: dict[str, Any]) -> "Source": + def from_config(cls, config: dict[str, Any], log: BoundLogger) -> "Source": ... - @abstractmethod def backup(self, revision: "Revision") -> "Source": ... From d580c5074f144ab78714741e177ee4585ba252ce Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Fri, 28 Jun 2024 14:57:33 +0200 Subject: [PATCH 19/25] snapshot: further simplify the rbd source --- src/backy/rbd/chunked/__init__.py | 26 ++ src/backy/rbd/chunked/chunk.py | 24 +- src/backy/rbd/chunked/file.py | 4 +- src/backy/rbd/quarantine.py | 4 +- src/backy/rbd/rbd.py | 328 ++++++++++++++++++ src/backy/rbd/{rbdsource.py => source.py} | 303 +++++++++++++--- src/backy/rbd/sources/ceph/__init__.py | 14 - src/backy/rbd/sources/ceph/diff.py | 157 --------- src/backy/rbd/sources/ceph/rbd.py | 165 --------- src/backy/rbd/sources/ceph/source.py | 172 --------- src/backy/rbd/sources/ceph/tests/__init__.py | 0 .../rbd/sources/flyingcircus/__init__.py | 1 - src/backy/rbd/sources/flyingcircus/source.py | 80 ----- .../rbd/{sources/ceph => }/tests/conftest.py | 0 .../{sources/ceph => }/tests/nodata.rbddiff | Bin .../ceph => }/tests/test_ceph_source.py | 0 .../rbd/{sources/ceph => }/tests/test_diff.py | 0 .../ceph => }/tests/test_fc_source.py | 0 .../rbd/{sources/ceph => }/tests/test_rbd.py | 0 src/backy/repository.py | 13 + src/backy/source.py | 9 +- 21 files changed, 648 insertions(+), 652 deletions(-) create mode 100644 src/backy/rbd/rbd.py rename src/backy/rbd/{rbdsource.py => source.py} (51%) delete mode 100644 src/backy/rbd/sources/ceph/__init__.py delete mode 100644 src/backy/rbd/sources/ceph/diff.py delete mode 100644 src/backy/rbd/sources/ceph/rbd.py delete mode 100644 src/backy/rbd/sources/ceph/source.py delete mode 100644 src/backy/rbd/sources/ceph/tests/__init__.py delete mode 100644 src/backy/rbd/sources/flyingcircus/__init__.py delete mode 100644 src/backy/rbd/sources/flyingcircus/source.py rename src/backy/rbd/{sources/ceph => }/tests/conftest.py (100%) rename src/backy/rbd/{sources/ceph => }/tests/nodata.rbddiff (100%) rename src/backy/rbd/{sources/ceph => }/tests/test_ceph_source.py (100%) rename src/backy/rbd/{sources/ceph => }/tests/test_diff.py (100%) rename src/backy/rbd/{sources/ceph => }/tests/test_fc_source.py (100%) rename src/backy/rbd/{sources/ceph => }/tests/test_rbd.py (100%) diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py index e69de29b..5ad5a0fc 100644 --- a/src/backy/rbd/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -0,0 +1,26 @@ +from typing import TypeAlias + +from .chunk import Chunk +from .file import File +from .store import Store + +__all__ = [ + "Chunk", + "File", + "Store", + "Hash", + "BackendException", + "InconsistentHash", +] + +Hash: TypeAlias = str + + +class BackendException(IOError): + pass + + +class InconsistentHash(BackendException): + def __init__(self, expected, actual): + self.expected = expected + self.actual = actual diff --git a/src/backy/rbd/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py index 74d57861..b289513b 100644 --- a/src/backy/rbd/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -2,18 +2,18 @@ import io import os import tempfile -from typing import TYPE_CHECKING, Optional, Tuple, TypeAlias +from typing import TYPE_CHECKING, Optional, Tuple import lzo import mmh3 -import backy.rbd.chunked from backy.utils import posix_fadvise +from . import BackendException, Hash, InconsistentHash + if TYPE_CHECKING: - from backy.rbd.chunked import Store + from .store import Store -Hash: TypeAlias = str chunk_stats = { "write_full": 0, @@ -21,16 +21,6 @@ } -class BackendException(IOError): - pass - - -class InconsistentHash(BackendException): - def __init__(self, expected, actual): - self.expected = expected - self.actual = actual - - class Chunk(object): """A chunk in a file that represents a part of it. @@ -134,9 +124,9 @@ def flush(self) -> Optional[Hash]: target = self.store.chunk_path(self.hash) if self.hash not in self.store.seen: if self.store.force_writes or not target.exists(): - # Create the tempfile in the right directory to increase locality - # of our change - avoid renaming between multiple directories to - # reduce traffic on the directory nodes. + # Create the tempfile in the right directory to increase + # locality of our change - avoid renaming between multiple + # directories to reduce traffic on the directory nodes. fd, tmpfile_name = tempfile.mkstemp(dir=target.parent) posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore with os.fdopen(fd, mode="wb") as f: diff --git a/src/backy/rbd/chunked/file.py b/src/backy/rbd/chunked/file.py index 7dccd428..f414dd28 100644 --- a/src/backy/rbd/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -163,7 +163,9 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: elif whence == io.SEEK_CUR: position = position + offset else: - raise ValueError("`whence` does not support mode {}".format(whence)) + raise ValueError( + "`whence` does not support mode {}".format(whence) + ) if position < 0: raise ValueError("Can not seek before the beginning of a file.") diff --git a/src/backy/rbd/quarantine.py b/src/backy/rbd/quarantine.py index 9f22c5a4..adecf63c 100644 --- a/src/backy/rbd/quarantine.py +++ b/src/backy/rbd/quarantine.py @@ -89,7 +89,9 @@ def representer(dumper, data): with SafeFile(path, encoding="utf-8") as f: f.open_new("wb") - yaml.dump(report.to_dict(), f, sort_keys=False, Dumper=CustomDumper) + yaml.dump( + report.to_dict(), f, sort_keys=False, Dumper=CustomDumper + ) def _store_chunk(self, chunk: bytes, hash: str) -> None: self.log.debug("store-chunk", hash=hash) diff --git a/src/backy/rbd/rbd.py b/src/backy/rbd/rbd.py new file mode 100644 index 00000000..9b99796e --- /dev/null +++ b/src/backy/rbd/rbd.py @@ -0,0 +1,328 @@ +import contextlib +import json +import struct +import subprocess +from collections import namedtuple +from typing import IO, BinaryIO, Iterator, Optional + +from structlog.stdlib import BoundLogger + +from backy.ext_deps import RBD +from backy.utils import CHUNK_SIZE, punch_hole + + +def detect_whole_object_support(): + result = run( + ["rbd", "help", "export-diff"], stdout=PIPE, stderr=PIPE, check=True + ) + return "--whole-object" in result.stdout.decode("ascii") + + +try: + CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = detect_whole_object_support() +except Exception: + CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = False + + +class RBDClient(object): + log: BoundLogger + + def __init__(self, log: BoundLogger): + self.log = log.bind(subsystem="rbd") + + def _ceph_cli(self, cmdline, encoding="utf-8") -> str: + # This wrapper function for the `rbd` command is only used for getting + # and interpreting text messages, making this the correct level for + # managing text encoding. Other use cases where binary data is piped + # to rbd have their own dedicated wrappers. + return subprocess.check_output( + cmdline, encoding=encoding, errors="replace" + ) + + def _rbd(self, cmd, format=None): + cmd = filter(None, cmd) + rbd = [RBD] + + rbd.extend(cmd) + + if format == "json": + rbd.append("--format=json") + + self.log.debug("executing-command", command=" ".join(rbd)) + result = self._ceph_cli(rbd) + + self.log.debug("executed-command", stdout=result) + if format == "json": + result = json.loads(result) + + return result + + def exists(self, snapspec: str): + try: + return self._rbd(["info", snapspec], format="json") + except subprocess.CalledProcessError as e: + if e.returncode == 2: + return False + raise + + def map(self, image: str, readonly=False): + def parse_mappings_pre_nautilus(mappings): + """The parser code for Ceph release Luminous and earlier.""" + for mapping in mappings.values(): + if image == "{pool}/{name}@{snap}".format(**mapping): + return mapping + raise RuntimeError("Map not found in mapping list.") + + def parse_mappings_since_nautilus(mappings): + """The parser code for Ceph release Nautilus and later.""" + for mapping in mappings: + if image == "{pool}/{name}@{snap}".format(**mapping): + return mapping + raise RuntimeError("Map not found in mapping list.") + + versionstring = self._rbd(["--version"]) + + self._rbd(["map", image, "--read-only" if readonly else ""]) + + mappings_raw = self._rbd(["showmapped"], format="json") + + if "nautilus" in versionstring: + mapping = parse_mappings_since_nautilus(mappings_raw) + elif "luminous" in versionstring: + mapping = parse_mappings_pre_nautilus(mappings_raw) + else: + # our jewel build provides no version info + # this will break with releases newer than nautilus + mapping = parse_mappings_pre_nautilus(mappings_raw) + + def scrub_mapping(mapping): + SPEC = {"pool", "name", "snap", "device"} + # Ensure all specced keys exist + for key in SPEC: + if key not in mapping: + raise KeyError( + f"Missing key `{key}` in mapping {mapping!r}" + ) + # Scrub all non-specced keys + for key in list(mapping): + if key not in SPEC: + del mapping[key] + return mapping + + return scrub_mapping(mapping) + + def unmap(self, device): + self._rbd(["unmap", device]) + + def snap_create(self, image): + self._rbd(["snap", "create", image]) + + def snap_ls(self, image): + return self._rbd(["snap", "ls", image], format="json") + + def snap_rm(self, image): + return self._rbd(["snap", "rm", image]) + + @contextlib.contextmanager + def export_diff(self, new: str, old: str) -> Iterator["RBDDiffV1"]: + self.log.info("export-diff") + if CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: + EXPORT_WHOLE_OBJECT = ["--whole-object"] + else: + EXPORT_WHOLE_OBJECT = [] + proc = subprocess.Popen( + [RBD, "export-diff", new, "--from-snap", old] + + EXPORT_WHOLE_OBJECT + + ["-"], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + # Have a rather largish buffer size, so rbd has some room to + # push its data to, when we are busy writing. + bufsize=8 * CHUNK_SIZE, + ) + assert proc.stdout is not None + try: + yield RBDDiffV1(proc.stdout) + finally: + proc.stdout.close() + proc.wait() + + @contextlib.contextmanager + def image_reader(self, image: str) -> Iterator[BinaryIO]: + mapped = self.map(image, readonly=True) + source = open(mapped["device"], "rb", buffering=CHUNK_SIZE) + try: + yield source + finally: + source.close() + self.unmap(mapped["device"]) + + @contextlib.contextmanager + def export(self, image: str) -> Iterator[IO]: + self.log.info("export") + proc = subprocess.Popen( + [RBD, "export", image, "-"], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + # Have a rather largish buffer size, so rbd has some room to + # push its data to, when we are busy writing. + bufsize=4 * CHUNK_SIZE, + ) + assert proc.stdout is not None + try: + yield proc.stdout + finally: + proc.stdout.close() + proc.wait() + + +def unpack_from(fmt, f): + size = struct.calcsize(fmt) + b = f.read(size) + return struct.unpack(fmt, b) + + +Zero = namedtuple("Zero", ["start", "length"]) +Data = namedtuple("Data", ["start", "length", "stream"]) +SnapSize = namedtuple("SnapSize", ["size"]) +FromSnap = namedtuple("FromSnap", ["snapshot"]) +ToSnap = namedtuple("ToSnap", ["snapshot"]) + + +class RBDDiffV1(object): + f: IO + phase: str # header, metadata, data + record_type: Optional[str] + _streaming: bool + + header = b"rbd diff v1\n" + + def __init__(self, fh): + # self.filename = filename + self.f = fh + + self.phase = "header" + self.read_header() + self.record_type = None + self._streaming = False + + def read_header(self): + assert self.phase == "header" + header = self.f.read(len(self.header)) + if header != self.header: + raise ValueError("Unexpected header: {0!r}".format(header)) + self.phase = "metadata" + + def read_record(self): + if self.phase == "end": + return + assert not self._streaming, "Unread data from read_w. Consume first." + last_record_type = self.record_type + self.record_type = self.f.read(1).decode("ascii") + if self.record_type not in ["f", "t", "s", "w", "z", "e"]: + raise ValueError( + 'Got invalid record type "{}". Previous record: {}'.format( + self.record_type, last_record_type + ) + ) + method = getattr(self, "read_{}".format(self.record_type)) + return method() + + def read_fbytes(self, encoding=None): + length = unpack_from(" None: super().bind(repository) # TODO: move quarantine to repo self.quarantine = QuarantineStore(repository.path, self.log) - self.store = Store(repository.path / "chunks") + self.store = Store(repository.path / "chunks", self.log) def _path_for_revision(self, revision: Revision) -> Path: return self.repository.path / revision.uuid @@ -227,14 +229,9 @@ def gc(self) -> None: # This needs no locking as it's only a wrapper for restore_file and # restore_stdout and locking isn't re-entrant. - def restore( - self, - revision: Revision, - target: str, - restore_backend: RestoreBackend = RestoreBackend.AUTO, - ) -> None: - s = ChunkedFileBackend(revision, self.log).open("rb") - if restore_backend == RestoreBackend.AUTO: + def restore(self, revision: Revision, args: RestoreArgs) -> None: + s = self.open(revision) + if args.backend == RestoreBackend.AUTO: if self.backy_extract_supported(s): restore_backend = RestoreBackend.RUST else: @@ -242,12 +239,12 @@ def restore( self.log.info("restore-backend", backend=restore_backend.value) if restore_backend == RestoreBackend.PYTHON: with s as source: - if target != "-": - self.restore_file(source, target) + if args.target != "-": + self.restore_file(source, args.target) else: self.restore_stdout(source) elif restore_backend == RestoreBackend.RUST: - self.restore_backy_extract(revision, target) + self.restore_backy_extract(revision, args.target) def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: log = self.log.bind(subsystem="backy-extract") @@ -263,7 +260,7 @@ def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: if not version.startswith("backy-extract"): log.debug("unknown-version") return False - except: + except Exception: log.debug("unavailable") return False return True @@ -282,7 +279,8 @@ def restore_backy_extract(self, rev: Revision, target: str) -> None: ) if return_code: raise RuntimeError( - f"backy-extract failed with return code {return_code}. Maybe try `--backend python`?" + f"backy-extract failed with return code {return_code}. " + "Maybe try `--backend python`?" ) @Repository.locked(target=".purge", mode="shared") @@ -311,3 +309,226 @@ def restore_stdout(self, source: IO) -> None: if not chunk: break target.write(chunk) + + +class CephRBD: + """The Ceph RBD source. + + Manages snapshots corresponding to revisions and provides a verification + that tries to balance reliability and performance. + """ + + pool: str + image: str + always_full: bool + log: BoundLogger + rbd: RBDClient + revision: Revision + snapshot_timeout = 90 + + def __init__(self, config: dict, log: BoundLogger): + self.pool = config["pool"] + self.image = config["image"] + self.always_full = config.get("full-always", False) + self.log = log.bind(subsystem="ceph") + self.rbd = RBDClient(self.log) + self.vm = config["vm"] + self.consul_acl_token = config.get("consul_acl_token") + + def ready(self) -> bool: + """Check whether the source can be backed up. + + For RBD sources this means the volume exists and is accessible. + + """ + try: + if self.rbd.exists(self._image_name): + return True + except Exception: + self.log.exception("not-ready") + return False + + def __call__(self, revision): + self.revision = revision + return self + + def __enter__(self): + snapname = "backy-{}".format(self.revision.uuid) + self.create_snapshot(snapname) + return self + + def create_snapshot(self, snapname: str) -> None: + """An overridable method to allow different ways of creating the + snapshot. + """ + self.rbd.snap_create(self._image_name + "@" + snapname) + + @property + def _image_name(self) -> str: + return "{}/{}".format(self.pool, self.image) + + def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): + self._delete_old_snapshots() + + def get_parent(self) -> Optional[Revision]: + if self.always_full: + self.log.info("backup-always-full") + return None + revision = self.revision + while True: + parent = revision.get_parent() + if not parent: + self.log.info("backup-no-valid-parent") + return None + if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): + self.log.info( + "ignoring-rev-without-snapshot", + revision_uuid=parent.uuid, + ) + revision = parent + continue + # Ok, it's trusted and we have a snapshot. Let's do a diff. + return parent + + def diff(self, target: "RbdSource", parent: Revision) -> None: + self.log.info("diff") + snap_from = "backy-" + parent.uuid + snap_to = "backy-" + self.revision.uuid + s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) + with s as source, target.open(self.revision, parent) as target_: + bytes = source.integrate(target_, snap_from, snap_to) + self.log.info("diff-integration-finished") + + self.revision.stats["bytes_written"] = bytes + + # TMP Gather statistics to see where to optimize + from backy.rbd.chunked.chunk import chunk_stats + + self.revision.stats["chunk_stats"] = chunk_stats + + def full(self, target: File) -> None: + self.log.info("full") + s = self.rbd.export( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + copied = 0 + with s as source, target as target_: + while True: + buf = source.read(4 * backy.utils.MiB) + if not buf: + break + target_.write(buf) + copied += len(buf) + self.revision.stats["bytes_written"] = copied + + # TMP Gather statistics to see if we actually are aligned. + from backy.rbd.chunked.chunk import chunk_stats + + self.revision.stats["chunk_stats"] = chunk_stats + + def verify(self, target: File, quarantine: QuarantineStore) -> bool: + s = self.rbd.image_reader( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + self.revision.stats["ceph-verification"] = "partial" + + with s as source, target as target_: + self.log.info("verify") + return backy.utils.files_are_roughly_equal( + source, + target_, + report=lambda s, t, o: quarantine.add_report( + QuarantineReport(s, t, o) + ), + ) + + def _delete_old_snapshots(self) -> None: + # Clean up all snapshots except the one for the most recent valid + # revision. + # Previously we used to remove all snapshots but the one for this + # revision - which is wrong: broken new revisions would always cause + # full backups instead of new deltas based on the most recent valid + # one. + # XXX this will break if multiple servers are active + if not self.always_full and self.revision.repository.local_history: + keep_snapshot_revision = self.revision.repository.local_history[ + -1 + ].uuid + else: + keep_snapshot_revision = None + for snapshot in self.rbd.snap_ls(self._image_name): + if not snapshot["name"].startswith("backy-"): + # Do not touch non-backy snapshots + continue + uuid = snapshot["name"].replace("backy-", "") + if uuid != keep_snapshot_revision: + time.sleep(3) # avoid race condition while unmapping + self.log.info( + "delete-old-snapshot", snapshot_name=snapshot["name"] + ) + try: + self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) + except Exception: + self.log.exception( + "delete-old-snapshot-failed", + snapshot_name=snapshot["name"], + ) + + def create_snapshot(self, name: str) -> None: + consul = consulate.Consul(token=self.consul_acl_token) + snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) + self.log.info( + "creating-snapshot", + snapshot_name=name, + snapshot_key=snapshot_key, + ) + + consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} + + time.sleep(3) + try: + timeout = TimeOut( + self.snapshot_timeout, interval=2, raise_on_timeout=True + ) + while timeout.tick(): + for snapshot in self.rbd.snap_ls(self._image_name): + if snapshot["name"] == name: + return + except TimeOutError: + # The VM might have been shut down. Try doing a regular Ceph + # snapshot locally. + super(FlyingCircusRootDisk, self).create_snapshot(name) + except KeyboardInterrupt: + raise + finally: + # In case the snapshot still gets created: the general snapshot + # deletion code in ceph/source will clean up unused backy snapshots + # anyway. However, we need to work a little harder to delete old + # snapshot requests, otherwise we've sometimes seen those not + # getting deleted and then re-created all the time. + for key in list(consul.kv.find("snapshot/")): + try: + s = consul.kv[key] + except KeyError: + continue + try: + s = json.loads(s) + except json.decoder.JSONDecodeError: + # Clean up garbage. + self.log.warning( + "create-snapshot-removing-garbage-request", + snapshot_key=key, + ) + del consul.kv[key] + if s["vm"] != self.vm: + continue + # The knowledge about the `backy-` prefix isn't properly + # encapsulated here. + if s["snapshot"].startswith("backy-"): + self.log.info( + "create-snapshot-removing-request", + vm=s["vm"], + snapshot_name=s["snapshot"], + snapshot_key=key, + ) + del consul.kv[key] diff --git a/src/backy/rbd/sources/ceph/__init__.py b/src/backy/rbd/sources/ceph/__init__.py deleted file mode 100644 index 623db0e2..00000000 --- a/src/backy/rbd/sources/ceph/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from subprocess import PIPE, run - - -def detect_whole_object_support(): - result = run( - ["rbd", "help", "export-diff"], stdout=PIPE, stderr=PIPE, check=True - ) - return "--whole-object" in result.stdout.decode("ascii") - - -try: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = detect_whole_object_support() -except Exception: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = False diff --git a/src/backy/rbd/sources/ceph/diff.py b/src/backy/rbd/sources/ceph/diff.py deleted file mode 100644 index 487b9e18..00000000 --- a/src/backy/rbd/sources/ceph/diff.py +++ /dev/null @@ -1,157 +0,0 @@ -import struct -from collections import namedtuple -from typing import IO, Optional - -from backy.utils import punch_hole - - -def unpack_from(fmt, f): - size = struct.calcsize(fmt) - b = f.read(size) - return struct.unpack(fmt, b) - - -Zero = namedtuple("Zero", ["start", "length"]) -Data = namedtuple("Data", ["start", "length", "stream"]) -SnapSize = namedtuple("SnapSize", ["size"]) -FromSnap = namedtuple("FromSnap", ["snapshot"]) -ToSnap = namedtuple("ToSnap", ["snapshot"]) - - -class RBDDiffV1(object): - f: IO - phase: str # header, metadata, data - record_type: Optional[str] - _streaming: bool - - header = b"rbd diff v1\n" - - def __init__(self, fh): - # self.filename = filename - self.f = fh - - self.phase = "header" - self.read_header() - self.record_type = None - self._streaming = False - - def read_header(self): - assert self.phase == "header" - header = self.f.read(len(self.header)) - if header != self.header: - raise ValueError("Unexpected header: {0!r}".format(header)) - self.phase = "metadata" - - def read_record(self): - if self.phase == "end": - return - assert not self._streaming, "Unread data from read_w. Consume first." - last_record_type = self.record_type - self.record_type = self.f.read(1).decode("ascii") - if self.record_type not in ["f", "t", "s", "w", "z", "e"]: - raise ValueError( - 'Got invalid record type "{}". Previous record: {}'.format( - self.record_type, last_record_type - ) - ) - method = getattr(self, "read_{}".format(self.record_type)) - return method() - - def read_fbytes(self, encoding=None): - length = unpack_from(" str: - # This wrapper function for the `rbd` command is only used for - # getting and interpreting text messages, making this the correct level for - # managing text encoding. - # Other use cases where binary data is piped to rbd have their own dedicated - # wrappers. - return subprocess.check_output( - cmdline, encoding=encoding, errors="replace" - ) - - def _rbd(self, cmd, format=None): - cmd = filter(None, cmd) - rbd = [RBD] - - rbd.extend(cmd) - - if format == "json": - rbd.append("--format=json") - - self.log.debug("executing-command", command=" ".join(rbd)) - result = self._ceph_cli(rbd) - - self.log.debug("executed-command", stdout=result) - if format == "json": - result = json.loads(result) - - return result - - def exists(self, snapspec: str): - try: - return self._rbd(["info", snapspec], format="json") - except subprocess.CalledProcessError as e: - if e.returncode == 2: - return False - raise - - def map(self, image: str, readonly=False): - def parse_mappings_pre_nautilus(mappings): - """The parser code for Ceph release Luminous and earlier.""" - for mapping in mappings.values(): - if image == "{pool}/{name}@{snap}".format(**mapping): - return mapping - raise RuntimeError("Map not found in mapping list.") - - def parse_mappings_since_nautilus(mappings): - """The parser code for Ceph release Nautilus and later.""" - for mapping in mappings: - if image == "{pool}/{name}@{snap}".format(**mapping): - return mapping - raise RuntimeError("Map not found in mapping list.") - - versionstring = self._rbd(["--version"]) - - self._rbd(["map", image, "--read-only" if readonly else ""]) - - mappings_raw = self._rbd(["showmapped"], format="json") - - if "nautilus" in versionstring: - mapping = parse_mappings_since_nautilus(mappings_raw) - elif "luminous" in versionstring: - mapping = parse_mappings_pre_nautilus(mappings_raw) - else: - # our jewel build provides no version info - # this will break with releases newer than nautilus - mapping = parse_mappings_pre_nautilus(mappings_raw) - - def scrub_mapping(mapping): - SPEC = {"pool", "name", "snap", "device"} - # Ensure all specced keys exist - for key in SPEC: - if key not in mapping: - raise KeyError( - f"Missing key `{key}` in mapping {mapping!r}" - ) - # Scrub all non-specced keys - for key in list(mapping): - if key not in SPEC: - del mapping[key] - return mapping - - return scrub_mapping(mapping) - - def unmap(self, device): - self._rbd(["unmap", device]) - - def snap_create(self, image): - self._rbd(["snap", "create", image]) - - def snap_ls(self, image): - return self._rbd(["snap", "ls", image], format="json") - - def snap_rm(self, image): - return self._rbd(["snap", "rm", image]) - - @contextlib.contextmanager - def export_diff(self, new: str, old: str) -> Iterator[RBDDiffV1]: - self.log.info("export-diff") - if backy.rbd.sources.ceph.CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: - EXPORT_WHOLE_OBJECT = ["--whole-object"] - else: - EXPORT_WHOLE_OBJECT = [] - proc = subprocess.Popen( - [RBD, "export-diff", new, "--from-snap", old] - + EXPORT_WHOLE_OBJECT - + ["-"], - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - # Have a rather largish buffer size, so rbd has some room to - # push its data to, when we are busy writing. - bufsize=8 * CHUNK_SIZE, - ) - assert proc.stdout is not None - try: - yield RBDDiffV1(proc.stdout) - finally: - proc.stdout.close() - proc.wait() - - @contextlib.contextmanager - def image_reader(self, image: str) -> Iterator[BinaryIO]: - mapped = self.map(image, readonly=True) - source = open(mapped["device"], "rb", buffering=CHUNK_SIZE) - try: - yield source - finally: - source.close() - self.unmap(mapped["device"]) - - @contextlib.contextmanager - def export(self, image: str) -> Iterator[IO]: - self.log.info("export") - proc = subprocess.Popen( - [RBD, "export", image, "-"], - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - # Have a rather largish buffer size, so rbd has some room to - # push its data to, when we are busy writing. - bufsize=4 * CHUNK_SIZE, - ) - assert proc.stdout is not None - try: - yield proc.stdout - finally: - proc.stdout.close() - proc.wait() diff --git a/src/backy/rbd/sources/ceph/source.py b/src/backy/rbd/sources/ceph/source.py deleted file mode 100644 index 4cdca55b..00000000 --- a/src/backy/rbd/sources/ceph/source.py +++ /dev/null @@ -1,172 +0,0 @@ -import time -from typing import Optional - -from structlog.stdlib import BoundLogger - -import backy.utils -from backy.revision import Revision - -from ...chunked.file import File -from ...quarantine import QuarantineReport, QuarantineStore -from .rbd import RBDClient - - -class CephRBD: - """The Ceph RBD source. - - Manages snapshots corresponding to revisions and provides a verification - that tries to balance reliability and performance. - """ - - pool: str - image: str - always_full: bool - log: BoundLogger - rbd: RBDClient - revision: Revision - - def __init__(self, config: dict, log: BoundLogger): - self.pool = config["pool"] - self.image = config["image"] - self.always_full = config.get("full-always", False) - self.log = log.bind(subsystem="ceph") - self.rbd = RBDClient(self.log) - - def ready(self) -> bool: - """Check whether the source can be backed up. - - For RBD sources this means the volume exists and is accessible. - - """ - try: - if self.rbd.exists(self._image_name): - return True - except Exception: - self.log.exception("not-ready") - return False - - def __call__(self, revision): - self.revision = revision - return self - - def __enter__(self): - snapname = "backy-{}".format(self.revision.uuid) - self.create_snapshot(snapname) - return self - - def create_snapshot(self, snapname: str) -> None: - """An overridable method to allow different ways of creating the - snapshot. - """ - self.rbd.snap_create(self._image_name + "@" + snapname) - - @property - def _image_name(self) -> str: - return "{}/{}".format(self.pool, self.image) - - def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): - self._delete_old_snapshots() - - def get_parent(self) -> Optional[Revision]: - if self.always_full: - self.log.info("backup-always-full") - return None - revision = self.revision - while True: - parent = revision.get_parent() - if not parent: - self.log.info("backup-no-valid-parent") - return None - if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): - self.log.info( - "ignoring-rev-without-snapshot", - revision_uuid=parent.uuid, - ) - revision = parent - continue - # Ok, it's trusted and we have a snapshot. Let's do a diff. - return parent - - def diff(self, target: "RbdSource", parent: Revision) -> None: - self.log.info("diff") - snap_from = "backy-" + parent.uuid - snap_to = "backy-" + self.revision.uuid - s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) - with s as source, target.open(self.revision, parent) as target_: - bytes = source.integrate(target_, snap_from, snap_to) - self.log.info("diff-integration-finished") - - self.revision.stats["bytes_written"] = bytes - - # TMP Gather statistics to see where to optimize - from backy.rbd.chunked.chunk import chunk_stats - - self.revision.stats["chunk_stats"] = chunk_stats - - def full(self, target: File) -> None: - self.log.info("full") - s = self.rbd.export( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - copied = 0 - with s as source, target as target_: - while True: - buf = source.read(4 * backy.utils.MiB) - if not buf: - break - target_.write(buf) - copied += len(buf) - self.revision.stats["bytes_written"] = copied - - # TMP Gather statistics to see if we actually are aligned. - from backy.rbd.chunked.chunk import chunk_stats - - self.revision.stats["chunk_stats"] = chunk_stats - - def verify(self, target: File, quarantine: QuarantineStore) -> bool: - s = self.rbd.image_reader( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - self.revision.stats["ceph-verification"] = "partial" - - with s as source, target as target_: - self.log.info("verify") - return backy.utils.files_are_roughly_equal( - source, - target_, - report=lambda s, t, o: quarantine.add_report( - QuarantineReport(s, t, o) - ), - ) - - def _delete_old_snapshots(self) -> None: - # Clean up all snapshots except the one for the most recent valid - # revision. - # Previously we used to remove all snapshots but the one for this - # revision - which is wrong: broken new revisions would always cause - # full backups instead of new deltas based on the most recent valid - # one. - # XXX this will break if multiple servers are active - if not self.always_full and self.revision.repository.local_history: - keep_snapshot_revision = self.revision.repository.local_history[ - -1 - ].uuid - else: - keep_snapshot_revision = None - for snapshot in self.rbd.snap_ls(self._image_name): - if not snapshot["name"].startswith("backy-"): - # Do not touch non-backy snapshots - continue - uuid = snapshot["name"].replace("backy-", "") - if uuid != keep_snapshot_revision: - time.sleep(3) # avoid race condition while unmapping - self.log.info( - "delete-old-snapshot", snapshot_name=snapshot["name"] - ) - try: - self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) - except Exception: - self.log.exception( - "delete-old-snapshot-failed", - snapshot_name=snapshot["name"], - ) diff --git a/src/backy/rbd/sources/ceph/tests/__init__.py b/src/backy/rbd/sources/ceph/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/backy/rbd/sources/flyingcircus/__init__.py b/src/backy/rbd/sources/flyingcircus/__init__.py deleted file mode 100644 index 91b1aa49..00000000 --- a/src/backy/rbd/sources/flyingcircus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Make this a package. diff --git a/src/backy/rbd/sources/flyingcircus/source.py b/src/backy/rbd/sources/flyingcircus/source.py deleted file mode 100644 index 5dc681f3..00000000 --- a/src/backy/rbd/sources/flyingcircus/source.py +++ /dev/null @@ -1,80 +0,0 @@ -import json -import time -import uuid - -import consulate -from structlog.stdlib import BoundLogger - -from backy.utils import TimeOut, TimeOutError - -from ... import RbdSource -from ..ceph.source import CephRBD - - -class FlyingCircusRootDisk(CephRBD): - snapshot_timeout = 90 - - def __init__(self, config, log: BoundLogger): - super(FlyingCircusRootDisk, self).__init__(config, log) - self.vm = config["vm"] - self.consul_acl_token = config.get("consul_acl_token") - self.log = self.log.bind(vm=self.vm, subsystem="fc-disk") - - def create_snapshot(self, name: str) -> None: - consul = consulate.Consul(token=self.consul_acl_token) - snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) - self.log.info( - "creating-snapshot", - snapshot_name=name, - snapshot_key=snapshot_key, - ) - - consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} - - time.sleep(3) - try: - timeout = TimeOut( - self.snapshot_timeout, interval=2, raise_on_timeout=True - ) - while timeout.tick(): - for snapshot in self.rbd.snap_ls(self._image_name): - if snapshot["name"] == name: - return - except TimeOutError: - # The VM might have been shut down. Try doing a regular Ceph - # snapshot locally. - super(FlyingCircusRootDisk, self).create_snapshot(name) - except KeyboardInterrupt: - raise - finally: - # In case the snapshot still gets created: the general snapshot - # deletion code in ceph/source will clean up unused backy snapshots - # anyway. However, we need to work a little harder to delete old - # snapshot requests, otherwise we've sometimes seen those not - # getting deleted and then re-created all the time. - for key in list(consul.kv.find("snapshot/")): - try: - s = consul.kv[key] - except KeyError: - continue - try: - s = json.loads(s) - except json.decoder.JSONDecodeError: - # Clean up garbage. - self.log.warning( - "create-snapshot-removing-garbage-request", - snapshot_key=key, - ) - del consul.kv[key] - if s["vm"] != self.vm: - continue - # The knowledge about the `backy-` prefix isn't properly - # encapsulated here. - if s["snapshot"].startswith("backy-"): - self.log.info( - "create-snapshot-removing-request", - vm=s["vm"], - snapshot_name=s["snapshot"], - snapshot_key=key, - ) - del consul.kv[key] diff --git a/src/backy/rbd/sources/ceph/tests/conftest.py b/src/backy/rbd/tests/conftest.py similarity index 100% rename from src/backy/rbd/sources/ceph/tests/conftest.py rename to src/backy/rbd/tests/conftest.py diff --git a/src/backy/rbd/sources/ceph/tests/nodata.rbddiff b/src/backy/rbd/tests/nodata.rbddiff similarity index 100% rename from src/backy/rbd/sources/ceph/tests/nodata.rbddiff rename to src/backy/rbd/tests/nodata.rbddiff diff --git a/src/backy/rbd/sources/ceph/tests/test_ceph_source.py b/src/backy/rbd/tests/test_ceph_source.py similarity index 100% rename from src/backy/rbd/sources/ceph/tests/test_ceph_source.py rename to src/backy/rbd/tests/test_ceph_source.py diff --git a/src/backy/rbd/sources/ceph/tests/test_diff.py b/src/backy/rbd/tests/test_diff.py similarity index 100% rename from src/backy/rbd/sources/ceph/tests/test_diff.py rename to src/backy/rbd/tests/test_diff.py diff --git a/src/backy/rbd/sources/ceph/tests/test_fc_source.py b/src/backy/rbd/tests/test_fc_source.py similarity index 100% rename from src/backy/rbd/sources/ceph/tests/test_fc_source.py rename to src/backy/rbd/tests/test_fc_source.py diff --git a/src/backy/rbd/sources/ceph/tests/test_rbd.py b/src/backy/rbd/tests/test_rbd.py similarity index 100% rename from src/backy/rbd/sources/ceph/tests/test_rbd.py rename to src/backy/rbd/tests/test_rbd.py diff --git a/src/backy/repository.py b/src/backy/repository.py index 2d3cb0c6..ee2ae773 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -23,6 +23,19 @@ from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule +# Locking strategy: +# +# - You can only run one backup of a machine at a time, as the backup will +# interact with this machines' list of snapshots and will get confused +# if run in parallel. +# - You can restore while a backup is running. +# - You can only purge while nothing else is happening. +# - Trying to get a shared lock (specifically purge) will block and wait +# whereas trying to get an exclusive lock (running backups, purging) will +# immediately give up. +# - Locking is not re-entrant. It's forbidden and protected to call another +# locking main function. + class RepositoryNotEmpty(RuntimeError): pass diff --git a/src/backy/source.py b/src/backy/source.py index 9c45f3dd..0712d6de 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from importlib.metadata import entry_points -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Generic, TypeVar from structlog.stdlib import BoundLogger @@ -15,7 +15,10 @@ def factory_by_type(type_) -> type["Source"]: return SOURCE_PLUGINS[type_].load() -class Source(ABC): +RestoreArgsType = TypeVar("RestoreArgsType") + + +class Source(Generic[RestoreArgsType]): """A source provides specific implementations for making and restoring backups. @@ -66,5 +69,5 @@ def backup(self, revision: "Revision") -> "Source": ... @abstractmethod - def restore(self, revision: "Revision", *args, **kw) -> "Source": + def restore(self, revision: "Revision", args: RestoreArgsType): ... From ac3050717a551eae0de1cf4ff553f25dd70e2a99 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Tue, 23 Jul 2024 16:39:59 +0200 Subject: [PATCH 20/25] snapshot: backy-rbd refactor --- input-file | 1 - restore.img | 1 - src/backy/cli/__init__.py | 24 +- src/backy/cli/tests/test_client.py | 4 +- src/backy/cli/tests/test_main.py | 12 +- src/backy/daemon/scheduler.py | 6 +- src/backy/daemon/tests/test_daemon.py | 9 +- src/backy/file/__init__.py | 20 +- src/backy/file/tests/test_file.py | 33 +- src/backy/rbd/__init__.py | 23 +- src/backy/rbd/chunked/__init__.py | 25 +- src/backy/rbd/chunked/chunk.py | 18 +- src/backy/rbd/chunked/file.py | 56 +-- src/backy/rbd/chunked/tests/test_backend.py | 47 --- src/backy/rbd/chunked/tests/test_chunk.py | 2 - src/backy/rbd/chunked/tests/test_file.py | 3 + src/backy/rbd/conftest.py | 32 -- src/backy/rbd/quarantine.py | 108 ----- src/backy/rbd/rbd.py | 16 +- src/backy/rbd/source.py | 324 ++++++++------- src/backy/rbd/tests/conftest.py | 27 +- src/backy/rbd/tests/test_backup.py | 73 ---- src/backy/rbd/tests/test_backy-rbd.py | 179 -------- src/backy/rbd/tests/test_ceph.py | 384 ++++++++++++++++++ src/backy/rbd/tests/test_ceph_source.py | 346 ---------------- src/backy/rbd/tests/test_diff.py | 8 +- src/backy/rbd/tests/test_fc_source.py | 88 ---- src/backy/rbd/tests/test_main.py | 154 +++++-- src/backy/rbd/tests/test_rbd.py | 6 +- src/backy/rbd/tests/test_source.py | 321 ++++++++++++++- src/backy/report.py | 95 +++++ src/backy/repository.py | 199 +++++---- src/backy/source.py | 33 +- .../test_report.py} | 11 +- src/backy/tests/test_repository.py | 1 + src/backy/utils.py | 21 +- 36 files changed, 1384 insertions(+), 1326 deletions(-) delete mode 100644 input-file delete mode 100644 restore.img delete mode 100644 src/backy/rbd/chunked/tests/test_backend.py delete mode 100644 src/backy/rbd/conftest.py delete mode 100644 src/backy/rbd/quarantine.py delete mode 100644 src/backy/rbd/tests/test_backup.py delete mode 100644 src/backy/rbd/tests/test_backy-rbd.py create mode 100644 src/backy/rbd/tests/test_ceph.py delete mode 100644 src/backy/rbd/tests/test_ceph_source.py delete mode 100644 src/backy/rbd/tests/test_fc_source.py create mode 100644 src/backy/report.py rename src/backy/{rbd/tests/test_quarantine.py => tests/test_report.py} (77%) create mode 100644 src/backy/tests/test_repository.py diff --git a/input-file b/input-file deleted file mode 100644 index 1b67461d..00000000 --- a/input-file +++ /dev/null @@ -1 +0,0 @@ -volume contents diff --git a/restore.img b/restore.img deleted file mode 100644 index 1b67461d..00000000 --- a/restore.img +++ /dev/null @@ -1 +0,0 @@ -volume contents diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index 53d0e616..bf0aabe5 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -1,7 +1,5 @@ import argparse import asyncio -import errno -import os import re import subprocess import sys @@ -23,11 +21,12 @@ from backy import logging from backy.daemon import BackyDaemon from backy.daemon.api import Client +from backy.rbd import RestoreBackend # XXX invert this dependency -from backy.rbd.rbdsource import RestoreBackend from backy.repository import Repository from backy.revision import Revision +from backy.schedule import Schedule from backy.utils import format_datetime_local, generate_taskid # single repo commands @@ -39,11 +38,11 @@ # log [--filter] (status) Show backup status. Show inventory and summary information -# backup [--fg] Perform a backup -# restore Restore (a given revision) to a given target +# backup [--fg] (remote) Perform a backup +# restore (remote) Restore (a given revision) to a given target # distrust Distrust specified revisions -# verify Verify specified revisions +# verify (remote) Verify specified revisions # rm Forget specified revision # tag Modify tags on revision @@ -100,16 +99,21 @@ def create_api_client(self): ) def init(self, type): - source = backy.source.factory_by_type(type) - Repository.init(self.path, self.log, source=source) + sourcefactory = backy.source.factory_by_type(type) + source = sourcefactory(*sourcefactory.argparse()) + # TODO: check if repo already exists + repo = Repository(self.path / "config", source, Schedule(), self.log) + repo.connect() + repo.store() def rev_parse(self, revision: str, uuid: bool) -> None: - b = Repository(self.path, self.log) + b = Repository.load(self.path, self.log) + b.connect() for r in b.find_revisions(revision): if uuid: print(r.uuid) else: - print(r.filename) + print(r.info_filename) def log_(self, yaml_: bool, revision: str) -> None: revs = Repository(self.path, self.log).find_revisions(revision) diff --git a/src/backy/cli/tests/test_client.py b/src/backy/cli/tests/test_client.py index bd354541..b2db886f 100644 --- a/src/backy/cli/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -7,7 +7,7 @@ from backy import utils from backy.daemon.api import BackyAPI, Client -from backy.rbd.quarantine import QuarantineReport +from backy.report import ChunkMismatchReport from backy.revision import Revision from backy.tests import Ellipsis @@ -321,7 +321,7 @@ async def test_cli_check_manual_tags(daemon, cli_client, log): async def test_cli_check_quarantine(daemon, cli_client, log): job = daemon.jobs["test01"] - job.repository.quarantine.add_report(QuarantineReport(b"a", b"b", 0)) + job.repository.quarantine.add_report(ChunkMismatchReport(b"a", b"b", 0)) utils.log_data = "" try: diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index cf25061a..90598a22 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -154,9 +154,9 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): os.makedirs(tmp_path / "backy") os.chdir(tmp_path / "backy") - with open(tmp_path / "backy" / "config", "wb") as f: + with open(tmp_path / "backy" / "config", "w", encoding="utf-8") as f: f.write( - """ + f""" --- schedule: daily: @@ -164,12 +164,8 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): keep: 7 source: type: file - filename: {} -""".format( - __file__ - ).encode( - "utf-8" - ) + filename: {__file__} +""" ) monkeypatch.setattr( diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index 0d0bb996..fd717415 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -463,7 +463,8 @@ def stop(self) -> None: self.update_status("") async def push_metadata(self) -> int: - return await self.repository.run_with_backup_lock(self._push_metadata) + with self.repository.locked(target=".backup", mode="exclusive"): + return await self._push_metadata() async def _push_metadata(self) -> int: grouped = defaultdict(list) @@ -535,7 +536,8 @@ async def _push_metadata_single( return error async def pull_metadata(self) -> int: - return await self.repository.run_with_backup_lock(self._pull_metadata) + with self.repository.locked(target=".backup", mode="exclusive"): + return await self._pull_metadata() async def _pull_metadata(self) -> int: async def remove_dead_peer(): diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index 1ca0d7e0..0b6ded26 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -13,7 +13,6 @@ from backy import utils from backy.daemon import BackyDaemon from backy.daemon.scheduler import Job -from backy.rbd.chunked import ChunkedFileBackend from backy.revision import Revision from backy.tests import Ellipsis @@ -146,7 +145,7 @@ async def send_sighup(): assert signal_task not in all_tasks -async def test_run_backup(daemon, log): +async def test_run_backup(daemon, rbdrepository, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) @@ -154,8 +153,7 @@ async def test_run_backup(daemon, log): assert len(job.repository.history) == 1 revision = job.repository.history[0] assert revision.tags == {"manual:asdf"} - backend = ChunkedFileBackend(revision, log) - with backend.open("r") as f: + with rbdrepository.open(revision) as f: assert f.read() == b"I am your father, Luke!" # Run again. This also covers the code path that works if @@ -165,8 +163,7 @@ async def test_run_backup(daemon, log): assert len(job.repository.history) == 2 revision = job.repository.history[1] assert revision.tags == {"manual:asdf"} - backend = ChunkedFileBackend(revision, log) - with backend.open("r") as f: + with rbdrepository.open(revision) as f: assert f.read() == b"I am your father, Luke!" diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index 0261a318..d6dd37d4 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -17,22 +17,25 @@ class FileSource(Source): - path: Path # the source we are backing up type_ = "file" subcommand = "backy-file" - def __init__(self, path: Path): + repository: Repository + path: Path # the source we are backing up + + def __init__(self, repository: Repository, path: Path): + self.repository = repository self.path = path @classmethod def from_config( - cls, config: dict[str, Any], log: BoundLogger + cls, repository: Repository, config: dict[str, Any], log: BoundLogger ) -> "FileSource": - return cls(path=Path(config["path"])) + assert cls.type_ == config["type"] + return cls(repository, Path(config["path"])) - @staticmethod - def to_config(path: Path) -> dict[str, Any]: - return {"path": str(path)} + def to_config(self) -> dict[str, Any]: + return {"type": self.type_, "path": str(self.path)} def _path_for_revision(self, revision: Revision) -> Path: return self.repository.path / revision.uuid @@ -148,7 +151,8 @@ def main(): log.debug("invoked", args=" ".join(sys.argv)) try: - b = FileSource(args.backupdir, log) + b = FileSource() + Repository.f # XXX scheduler? b._clean() ret = 0 diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py index 19d41cb1..f8d2d352 100644 --- a/src/backy/file/tests/test_file.py +++ b/src/backy/file/tests/test_file.py @@ -1,5 +1,3 @@ -from typing import cast - import yaml from backy.file import FileSource @@ -11,14 +9,12 @@ def test_bootstrap_from_api(tmp_path, log): original = tmp_path / "original.txt" - source = FileSource(original) - schedule = Schedule() + repository = Repository(tmp_path / "repository", FileSource, schedule, log) + repository.connect() + source = FileSource(repository, original) - repo_path = tmp_path / "repository" - repository = Repository(repo_path, source, schedule, log) - - exercise_fresh_repo(repository) + exercise_fresh_repo(source) def test_bootstrap_from_config(tmp_path, log): @@ -26,28 +22,29 @@ def test_bootstrap_from_config(tmp_path, log): repo_path = tmp_path / "repository" - config = { + repo_conf = { "path": repo_path, "schedule": {}, - "source": {"type": "file", "path": str(original)}, + "type": "file", } + source_conf = {"type": "file", "path": str(original)} - repository = Repository.from_config(config, log) - - exercise_fresh_repo(repository) + repository = Repository.from_config(repo_conf, log) + repository.connect() + source = FileSource.from_config(repository, source_conf, log) + exercise_fresh_repo(source) -def exercise_fresh_repo(repository: Repository): - source = cast(FileSource, repository.source) +def exercise_fresh_repo(source: FileSource): original = source.path - repository.connect() - with open(original, "w") as f: f.write("This is the original file.") - revision = Revision.create(repository, {"test"}, repository.log) + revision = Revision.create( + source.repository, {"test"}, source.repository.log + ) source.backup(revision) with open(original, "w") as f: diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 483aa5d0..4214f446 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -8,7 +8,8 @@ from backy.utils import generate_taskid from .. import logging -from .rbdsource import RbdSource, RestoreBackend +from ..repository import Repository +from .source import RBDSource, RestoreArgs, RestoreBackend def main(): @@ -97,19 +98,29 @@ def main(): log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) + # TODO add init here? + try: - b = RbdSource(args.backupdir, log) + repo = Repository.load(args.backupdir, log) + repo.connect() + source = RBDSource.from_repo(repo) + ret = 0 match args.func: case "backup": - success = b.backup(args.revision) + success = source.backup(repo.find_by_uuid(args.revision)) ret = int(not success) case "restore": - b.restore(args.revision, args.target, args.restore_backend) + source.restore( + repo.find_by_uuid(args.revision), + RestoreArgs( + target=args.target, backend=args.restore_backend + ), + ) case "gc": - b.gc() + source.gc() case "verify": - b.verify(args.revision) + source.verify(repo.find_by_uuid(args.revision)) case _: raise ValueError("invalid function: " + args.fun) log.debug("return-code", code=ret) diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py index 5ad5a0fc..2bd87626 100644 --- a/src/backy/rbd/chunked/__init__.py +++ b/src/backy/rbd/chunked/__init__.py @@ -1,5 +1,18 @@ from typing import TypeAlias +Hash: TypeAlias = str + + +class BackendException(IOError): + pass + + +class InconsistentHash(BackendException): + def __init__(self, expected, actual): + self.expected = expected + self.actual = actual + + from .chunk import Chunk from .file import File from .store import Store @@ -12,15 +25,3 @@ "BackendException", "InconsistentHash", ] - -Hash: TypeAlias = str - - -class BackendException(IOError): - pass - - -class InconsistentHash(BackendException): - def __init__(self, expected, actual): - self.expected = expected - self.actual = actual diff --git a/src/backy/rbd/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py index b289513b..04b9aa82 100644 --- a/src/backy/rbd/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -15,12 +15,6 @@ from .store import Store -chunk_stats = { - "write_full": 0, - "write_partial": 0, -} - - class Chunk(object): """A chunk in a file that represents a part of it. @@ -35,16 +29,16 @@ class Chunk(object): store: "Store" clean: bool data: Optional[io.BytesIO] + stats: dict def __init__( - self, - store: "Store", - hash: Optional[Hash], + self, store: "Store", hash: Optional[Hash], stats: Optional[dict] = None ): self.hash = hash self.store = store self.clean = True self.data = None + self.stats = stats if stats is not None else dict() def _read_existing(self) -> None: if self.data: @@ -100,13 +94,15 @@ def write(self, offset: int, data: bytes) -> Tuple[int, bytes]: if offset == 0 and len(data) == self.CHUNK_SIZE: # Special case: overwrite the entire chunk. self._init_data(data) - chunk_stats["write_full"] += 1 + self.stats.setdefault("write_full", 0) + self.stats["write_full"] += 1 else: self._read_existing() assert self.data self.data.seek(offset) self.data.write(data) - chunk_stats["write_partial"] += 1 + self.stats.setdefault("write_partial", 0) + self.stats["write_partial"] += 1 self.clean = False return len(data), remaining_data diff --git a/src/backy/rbd/chunked/file.py b/src/backy/rbd/chunked/file.py index f414dd28..24aa0a07 100644 --- a/src/backy/rbd/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -34,10 +34,9 @@ class File(object): name: str store: "Store" + stats: dict closed: bool - overlay: bool size: int - mode: str _position: int _access_stats: dict[int, Tuple[int, float]] # (count, last) @@ -48,30 +47,18 @@ def __init__( self, name: str | os.PathLike, store: "Store", - mode: str = "rw", - overlay: bool = False, + stats: Optional[dict] = None, ): self.name = str(name) self.store = store + self.stats = stats if stats is not None else dict() self.closed = False # This indicates that writes should be temporary and no modify # the metadata when closing. - self.overlay = overlay self._position = 0 self._access_stats = defaultdict(lambda: (0, 0)) - self.mode = mode - - if "+" in self.mode: - self.mode += "w" - if "a" in self.mode: - self.mode += "w" - self.mode = "".join(set(self.mode)) - - if not os.path.exists(name) and "w" not in self.mode: - raise FileNotFoundError("File not found: {}".format(self.name)) - if not os.path.exists(name): self._mapping = {} self.size = 0 @@ -90,9 +77,6 @@ def __init__( self._mapping = {int(k): v for k, v in meta["mapping"].items()} self.size = meta["size"] - if "a" in self.mode: - self._position = self.size - # Chunks that we are working on. self._chunks = {} @@ -120,27 +104,25 @@ def _flush_chunks(self, target: Optional[int] = None) -> None: self._chunks = dict(keep_chunks) def flush(self) -> None: - assert "w" in self.mode and not self.closed + assert not self.closed self._flush_chunks(0) - if not self.overlay: - with open(self.name, "w") as f: - json.dump({"mapping": self._mapping, "size": self.size}, f) - f.flush() - os.fsync(f) + with open(self.name, "w") as f: + json.dump({"mapping": self._mapping, "size": self.size}, f) + f.flush() + os.fsync(f) def close(self) -> None: assert not self.closed - if "w" in self.mode: - self.flush() + self.flush() self.closed = True def isatty(self) -> bool: return False def readable(self) -> bool: - return "r" in self.mode and not self.closed + return not self.closed # def readline(size=-1) # def readlines(hint=-1) @@ -163,9 +145,7 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: elif whence == io.SEEK_CUR: position = position + offset else: - raise ValueError( - "`whence` does not support mode {}".format(whence) - ) + raise ValueError("`whence` does not support mode {}".format(whence)) if position < 0: raise ValueError("Can not seek before the beginning of a file.") @@ -192,7 +172,7 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: return position def truncate(self, size: Optional[int] = None) -> None: - assert "w" in self.mode and not self.closed + assert not self.closed if size is None: size = self._position # Update content hash @@ -206,7 +186,7 @@ def truncate(self, size: Optional[int] = None) -> None: self.flush() def read(self, size: int = -1) -> bytes: - assert "r" in self.mode and not self.closed + assert not self.closed result = io.BytesIO() max_size = self.size - self._position if size == -1: @@ -225,10 +205,12 @@ def read(self, size: int = -1) -> bytes: return result.getvalue() def writable(self) -> bool: - return "w" in self.mode and not self.closed + return not self.closed def write(self, data: bytes) -> None: - assert "w" in self.mode and not self.closed + assert not self.closed + self.stats.setdefault("bytes_written", 0) + self.stats["bytes_written"] += len(data) while data: chunk, _, offset = self._current_chunk() written, data = chunk.write(offset, data) @@ -242,7 +224,9 @@ def _current_chunk(self) -> Tuple[Chunk, int, int]: if chunk_id not in self._chunks: self._flush_chunks() self._chunks[chunk_id] = Chunk( - self.store, self._mapping.get(chunk_id) + self.store, + self._mapping.get(chunk_id), + self.stats.setdefault("chunk_stats", dict()), ) count = self._access_stats[chunk_id][0] self._access_stats[chunk_id] = (count + 1, time.time()) diff --git a/src/backy/rbd/chunked/tests/test_backend.py b/src/backy/rbd/chunked/tests/test_backend.py deleted file mode 100644 index 858fd956..00000000 --- a/src/backy/rbd/chunked/tests/test_backend.py +++ /dev/null @@ -1,47 +0,0 @@ -from backy.rbd.chunked import ChunkedFileBackend -from backy.revision import Revision - - -def test_overlay(rbdrepository, log): - r = Revision.create(rbdrepository, set(), log) - backend = ChunkedFileBackend(r, log) - # Write 1 version to the file - f = backend.open("w") - f.write(b"asdf") - f.close() - with backend.open("r") as f: - assert f.read() == b"asdf" - # Open the file in overlay, write to it - f = backend.open("o") - assert f.read() == b"asdf" - f.seek(0) - f.write(b"bsdf") - f.seek(0) - assert f.read() == b"bsdf" - f.close() - # Close the file and open it again results in the original content - f = backend.open("r") - assert f.read() == b"asdf" - f.close() - - -def test_purge(rbdrepository, log): - r = Revision.create(rbdrepository, set(), log) - backend = ChunkedFileBackend(r, log) - # Write 1 version to the file - f = backend.open("w") - f.write(b"asdf") - f.close() - r.materialize() - remote = Revision(rbdrepository, log) # remote revision without local data - remote.server = "remote" - remote.materialize() - rbdrepository.scan() - # Reassign as the scan will create a new reference - r = rbdrepository.history[0] - assert len(list(backend.store.ls())) == 1 - backend.purge() - assert len(list(backend.store.ls())) == 1 - r.remove() - backend.purge() - assert len(list(backend.store.ls())) == 0 diff --git a/src/backy/rbd/chunked/tests/test_chunk.py b/src/backy/rbd/chunked/tests/test_chunk.py index b8a2578c..6f2c2f0b 100644 --- a/src/backy/rbd/chunked/tests/test_chunk.py +++ b/src/backy/rbd/chunked/tests/test_chunk.py @@ -96,8 +96,6 @@ def test_chunk_fails_wrong_content(tmp_path, log): with open(p, "wb") as existing: existing.write(lzo.compress(b"bsdf")) - f = File(tmp_path / "asdf", store) - chunk = Chunk(store, chunk_hash) with pytest.raises(InconsistentHash): chunk.read(0) diff --git a/src/backy/rbd/chunked/tests/test_file.py b/src/backy/rbd/chunked/tests/test_file.py index 69244bc6..586d9e70 100644 --- a/src/backy/rbd/chunked/tests/test_file.py +++ b/src/backy/rbd/chunked/tests/test_file.py @@ -244,3 +244,6 @@ def test_rplus_and_append_positions(tmp_path, log): with File(tmp_path / "asdf", store) as f: assert f.read() == b"bsdfcsdf" + + +# TODO test bytes_written and chunk_stats diff --git a/src/backy/rbd/conftest.py b/src/backy/rbd/conftest.py deleted file mode 100644 index f248141c..00000000 --- a/src/backy/rbd/conftest.py +++ /dev/null @@ -1,32 +0,0 @@ -import json -import os - -import pytest - -from backy.rbd import RbdSource -from backy.revision import Revision - -fixtures = os.path.dirname(__file__) + "/tests/samples" - - -@pytest.fixture -def rbdrepository(schedule, tmp_path, log): - with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: - json.dump( - { - "source": { - "type": "file", - "filename": "input-file", - }, - "schedule": schedule.to_dict(), - }, - f, - ) - return RbdSource(tmp_path, log) - - -def create_rev(rbdrepository, tags): - r = Revision.create(rbdrepository, tags, rbdrepository.log) - r.materialize() - rbdrepository.scan() - return r diff --git a/src/backy/rbd/quarantine.py b/src/backy/rbd/quarantine.py deleted file mode 100644 index adecf63c..00000000 --- a/src/backy/rbd/quarantine.py +++ /dev/null @@ -1,108 +0,0 @@ -import datetime -import hashlib -import traceback -from pathlib import Path -from typing import List - -import shortuuid -import yaml -from structlog.stdlib import BoundLogger -from yaml import SafeDumper - -import backy -from backy.utils import SafeFile - - -class QuarantineReport: - uuid: str - source_chunk: bytes - source_hash: str - target_chunk: bytes - target_hash: str - offset: int - timestamp: datetime.datetime - traceback: str - - def __init__( - self, source_chunk: bytes, target_chunk: bytes, offset: int - ) -> None: - self.uuid = shortuuid.uuid() - self.source_chunk = source_chunk - self.source_hash = hashlib.md5(source_chunk).hexdigest() - self.target_chunk = target_chunk - self.target_hash = hashlib.md5(target_chunk).hexdigest() - self.offset = offset - self.timestamp = backy.utils.now() - self.traceback = "".join(traceback.format_stack()).strip() - - def to_dict(self) -> dict: - return { - "uuid": self.uuid, - "source_hash": self.source_hash, - "target_hash": self.target_hash, - "offset": self.offset, - "timestamp": self.timestamp, - "traceback": self.traceback, - } - - -class QuarantineStore: - path: Path - chunks_path: Path - report_ids: List[str] - log: BoundLogger - - def __init__(self, backup_path: Path, log: BoundLogger) -> None: - self.path = backup_path / "quarantine" - self.path.mkdir(exist_ok=True) - self.chunks_path = self.path / "chunks" - self.chunks_path.mkdir(exist_ok=True) - self.log = log.bind(subsystem="quarantine") - self.scan() - - def add_report(self, report: QuarantineReport) -> None: - self.log.info("add-report", uuid=report.uuid) - self._store_chunk(report.source_chunk, report.source_hash) - self._store_chunk(report.target_chunk, report.target_hash) - self._store_report(report) - - self.report_ids.append(report.uuid) - - def _store_report(self, report: QuarantineReport) -> None: - self.log.debug("store-report", uuid=report.uuid) - path = self.path / f"{report.uuid}.report" - if path.exists(): - self.log.debug("store-report-exists", uuid=report.uuid) - return - - class CustomDumper(SafeDumper): - pass - - def representer(dumper, data): - return dumper.represent_scalar( - "tag:yaml.org,2002:str", - data, - style="|" if len(data) > 100 else None, - ) - - yaml.add_representer(str, representer, Dumper=CustomDumper) - - with SafeFile(path, encoding="utf-8") as f: - f.open_new("wb") - yaml.dump( - report.to_dict(), f, sort_keys=False, Dumper=CustomDumper - ) - - def _store_chunk(self, chunk: bytes, hash: str) -> None: - self.log.debug("store-chunk", hash=hash) - path = self.chunks_path / hash - if path.exists(): - self.log.debug("store-chunk-exists", hash=hash) - return - with SafeFile(path) as f: - f.open_new("wb") - f.write(chunk) - - def scan(self) -> None: - self.report_ids = [g.name for g in self.path.glob("*.report")] - self.log.debug("scan", entries=len(self.report_ids)) diff --git a/src/backy/rbd/rbd.py b/src/backy/rbd/rbd.py index 9b99796e..f53cee1b 100644 --- a/src/backy/rbd/rbd.py +++ b/src/backy/rbd/rbd.py @@ -12,8 +12,11 @@ def detect_whole_object_support(): - result = run( - ["rbd", "help", "export-diff"], stdout=PIPE, stderr=PIPE, check=True + result = subprocess.run( + ["rbd", "help", "export-diff"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, ) return "--whole-object" in result.stdout.decode("ascii") @@ -297,13 +300,8 @@ def read_data(self): return yield record - def integrate(self, target, snapshot_from, snapshot_to, clean=True): - """Integrate this diff into the given target. - - If clean is set (default: True) then remove the delta after a - successful integration. - - """ + def integrate(self, target, snapshot_from, snapshot_to): + """Integrate this diff into the given target.""" bytes = 0 for record in self.read_metadata(): diff --git a/src/backy/rbd/source.py b/src/backy/rbd/source.py index 16440a05..902fe77b 100644 --- a/src/backy/rbd/source.py +++ b/src/backy/rbd/source.py @@ -3,9 +3,10 @@ import subprocess import time import uuid +from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import IO, Any, Optional, Set +from typing import IO, Any, Literal, Optional, Set, cast import consulate from structlog.stdlib import BoundLogger @@ -13,6 +14,7 @@ import backy import backy.utils from backy.ext_deps import BACKY_EXTRACT +from backy.report import ChunkMismatchReport from backy.repository import Repository from backy.revision import Revision, Trust from backy.source import Source @@ -27,10 +29,13 @@ ) from .chunked import BackendException, Chunk, File, Hash, Store -from .quarantine import QuarantineReport, QuarantineStore from .rbd import RBDClient +def locked(target: str, mode: Literal["shared", "exclusive"]): + return Repository.locked(target, mode, repo_attr="repository") + + class RestoreBackend(Enum): AUTO = "auto" PYTHON = "python" @@ -40,34 +45,35 @@ def __str__(self): return self.value +@dataclass(frozen=True) class RestoreArgs: target: str - backend: RestoreBackend + backend: RestoreBackend = RestoreBackend.AUTO -class RbdSource(Source[RestoreArgs]): +class RBDSource(Source[RestoreArgs]): type_ = "rbd" subcommand = "backy-rbd" - source: "FlyingCircusRootDisk" + repository: Repository + ceph_rbd: "CephRBD" store: Store - quarantine: QuarantineStore log: BoundLogger - def __init__(self, rbdsource: "FlyingCircusRootDisk", log: BoundLogger): - self.source = rbdsource + def __init__( + self, repository: Repository, ceph_rbd: "CephRBD", log: BoundLogger + ): self.log = log.bind(subsystem="rbdsource") + self.repository = repository + self.ceph_rbd = ceph_rbd + self.store = Store(repository.path / "chunks", self.log) @classmethod - def from_config(cls, config: dict[str, Any], log: BoundLogger) -> "Source": - assert config.get("backend", "chunked") == "chunked" - return cls(FlyingCircusRootDisk(config, log), log) - - def bind(self, repository: "Repository") -> None: - super().bind(repository) - # TODO: move quarantine to repo - self.quarantine = QuarantineStore(repository.path, self.log) - self.store = Store(repository.path / "chunks", self.log) + def from_config( + cls, repository: Repository, config: dict[str, Any], log: BoundLogger + ) -> "Source": + assert cls.type_ == config["type"] + return cls(repository, CephRBD.from_config(config, log), log) def _path_for_revision(self, revision: Revision) -> Path: return self.repository.path / revision.uuid @@ -81,7 +87,9 @@ def open( ) as new, self._path_for_revision(parent).open("rb") as old: # This is ok, this is just metadata, not the actual data. new.write(old.read()) - file = File(self._path_for_revision(revision), self.store) + file = File( + self._path_for_revision(revision), self.store, revision.stats + ) if file.writable() and self.repository.contains_distrusted: # "Force write"-mode if any revision is distrusted. @@ -93,47 +101,50 @@ def open( ################# # Making backups - @Repository.locked(target=".backup", mode="exclusive") - @Repository.locked(target=".purge", mode="shared") + @locked(target=".backup", mode="exclusive") + @locked(target=".purge", mode="shared") def backup(self, revision: Revision) -> bool: self.repository.path.joinpath("last").unlink(missing_ok=True) self.repository.path.joinpath("last.rev").unlink(missing_ok=True) start = time.time() - if not self.source.ready(): + if not self.ceph_rbd.ready(): raise RuntimeError( "Source is not ready (does it exist? can you access it?)" ) - with self.source(revision) as source: - try: + try: + with self.ceph_rbd(revision) as source: parent_rev = source.get_parent() - file = self.open(revision, parent_rev) - if parent_rev: - source.full(file) - else: - source.diff(file, parent_rev) - file = self.open(revision) - verified = source.verify(file, self.quarantine) - except BackendException: - self.log.exception("backend-error-distrust-all") - verified = False - self.repository.distrust("local", skip_lock=True) - if not verified: - self.log.error( - "verification-failed", - revision_uuid=revision.uuid, - ) - revision.remove() - else: - self.log.info("verification-ok", revision_uuid=revision.uuid) - revision.stats["duration"] = time.time() - start - revision.write_info() - revision.readonly() - # Switched from a fine-grained syncing mechanism to "everything - # once" when we're done. This is as safe but much faster. - os.sync() + with self.open(revision, parent_rev) as file: + if parent_rev: + source.diff(file, parent_rev) + else: + source.full(file) + with self.open(revision) as file: + report = source.verify(file) + if report: + self.repository.add_report(report) + verified = not report + except BackendException: + self.log.exception("ceph-error-distrust-all") + verified = False + self.repository.distrust("local", skip_lock=True) + if not verified: + self.log.error( + "verification-failed", + revision_uuid=revision.uuid, + ) + revision.remove() + else: + self.log.info("verification-ok", revision_uuid=revision.uuid) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() + # Switched from a fine-grained syncing mechanism to "everything + # once" when we're done. This is as safe but much faster. + os.sync() # If there are distrusted revisions, then perform at least one # verification after a backup - for good measure and to keep things @@ -150,7 +161,7 @@ def backup(self, revision: Revision) -> bool: break return verified - @Repository.locked(target=".purge", mode="shared") + @locked(target=".purge", mode="shared") @report_status def verify(self, revision: Revision): log = self.log.bind(revision_uuid=revision.uuid) @@ -214,10 +225,12 @@ def verify(self, revision: Revision): yield END yield None - @Repository.locked(target=".purge", mode="exclusive") + @locked(target=".purge", mode="exclusive") def gc(self) -> None: self.log.debug("purge") used_chunks: Set[Hash] = set() + # TODO: also remove mapping file + # TODO: purge quarantine store for revision in self.repository.local_history: used_chunks.update(self.open(revision)._mapping.values()) self.store.purge(used_chunks) @@ -231,7 +244,8 @@ def gc(self) -> None: # restore_stdout and locking isn't re-entrant. def restore(self, revision: Revision, args: RestoreArgs) -> None: s = self.open(revision) - if args.backend == RestoreBackend.AUTO: + restore_backend = args.backend + if restore_backend == RestoreBackend.AUTO: if self.backy_extract_supported(s): restore_backend = RestoreBackend.RUST else: @@ -283,7 +297,7 @@ def restore_backy_extract(self, rev: Revision, target: str) -> None: "Maybe try `--backend python`?" ) - @Repository.locked(target=".purge", mode="shared") + @locked(target=".purge", mode="shared") def restore_file(self, source: IO, target_name: str) -> None: """Bulk-copy from open revision `source` to target file.""" self.log.debug("restore-file", source=source.name, target=target_name) @@ -295,7 +309,7 @@ def restore_file(self, source: IO, target_name: str) -> None: pass copy(source, target) - @Repository.locked(target=".purge", mode="shared") + @locked(target=".purge", mode="shared") def restore_stdout(self, source: IO) -> None: """Emit restore data to stdout (for pipe processing).""" self.log.debug("restore-stdout", source=source.name) @@ -321,19 +335,41 @@ class CephRBD: pool: str image: str always_full: bool - log: BoundLogger + vm: Optional[str] + consul_acl_token: Optional[str] rbd: RBDClient revision: Revision + log: BoundLogger + snapshot_timeout = 90 - def __init__(self, config: dict, log: BoundLogger): - self.pool = config["pool"] - self.image = config["image"] - self.always_full = config.get("full-always", False) + def __init__( + self, + pool: str, + image: str, + log: BoundLogger, + vm: Optional[str] = None, + consul_acl_token: Optional[str] = None, + always_full: bool = False, + ): + self.pool = pool + self.image = image + self.always_full = always_full + self.vm = vm + self.consul_acl_token = consul_acl_token self.log = log.bind(subsystem="ceph") self.rbd = RBDClient(self.log) - self.vm = config["vm"] - self.consul_acl_token = config.get("consul_acl_token") + + @classmethod + def from_config(cls, config: dict, log: BoundLogger) -> "CephRBD": + return cls( + config["pool"], + config["image"], + log, + config.get("vm"), + config.get("consul_acl_token"), + config.get("full-always", False), + ) def ready(self) -> bool: """Check whether the source can be backed up. @@ -357,11 +393,68 @@ def __enter__(self): self.create_snapshot(snapname) return self - def create_snapshot(self, snapname: str) -> None: - """An overridable method to allow different ways of creating the - snapshot. - """ - self.rbd.snap_create(self._image_name + "@" + snapname) + def create_snapshot(self, name: str) -> None: + if not self.consul_acl_token or not self.vm: + self.rbd.snap_create(self._image_name + "@" + name) + return + + consul = consulate.Consul(token=self.consul_acl_token) + snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) + self.log.info( + "creating-snapshot", + snapshot_name=name, + snapshot_key=snapshot_key, + ) + + consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} + + time.sleep(3) + try: + timeout = TimeOut( + self.snapshot_timeout, interval=2, raise_on_timeout=True + ) + while timeout.tick(): + for snapshot in self.rbd.snap_ls(self._image_name): + if snapshot["name"] == name: + return + except TimeOutError: + # The VM might have been shut down. Try doing a regular Ceph + # snapshot locally. + self.rbd.snap_create(self._image_name + "@" + name) + except KeyboardInterrupt: + raise + finally: + # In case the snapshot still gets created: the general snapshot + # deletion code in ceph/source will clean up unused backy snapshots + # anyway. However, we need to work a little harder to delete old + # snapshot requests, otherwise we've sometimes seen those not + # getting deleted and then re-created all the time. + for key in list(consul.kv.find("snapshot/")): + try: + s = consul.kv[key] + except KeyError: + continue + try: + s = json.loads(s) + except json.decoder.JSONDecodeError: + # Clean up garbage. + self.log.warning( + "create-snapshot-removing-garbage-request", + snapshot_key=key, + ) + del consul.kv[key] + if s["vm"] != self.vm: + continue + # The knowledge about the `backy-` prefix isn't properly + # encapsulated here. + if s["snapshot"].startswith("backy-"): + self.log.info( + "create-snapshot-removing-request", + vm=s["vm"], + snapshot_name=s["snapshot"], + snapshot_key=key, + ) + del consul.kv[key] @property def _image_name(self) -> str: @@ -390,57 +483,33 @@ def get_parent(self) -> Optional[Revision]: # Ok, it's trusted and we have a snapshot. Let's do a diff. return parent - def diff(self, target: "RbdSource", parent: Revision) -> None: + def diff(self, target: File, parent: Revision) -> None: self.log.info("diff") snap_from = "backy-" + parent.uuid snap_to = "backy-" + self.revision.uuid s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) - with s as source, target.open(self.revision, parent) as target_: - bytes = source.integrate(target_, snap_from, snap_to) + with s as source: + source.integrate(target, snap_from, snap_to) self.log.info("diff-integration-finished") - self.revision.stats["bytes_written"] = bytes - - # TMP Gather statistics to see where to optimize - from backy.rbd.chunked.chunk import chunk_stats - - self.revision.stats["chunk_stats"] = chunk_stats - def full(self, target: File) -> None: self.log.info("full") s = self.rbd.export( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) - copied = 0 - with s as source, target as target_: - while True: - buf = source.read(4 * backy.utils.MiB) - if not buf: - break - target_.write(buf) - copied += len(buf) - self.revision.stats["bytes_written"] = copied - - # TMP Gather statistics to see if we actually are aligned. - from backy.rbd.chunked.chunk import chunk_stats + with s as source: + while buf := source.read(4 * backy.utils.MiB): + target.write(buf) - self.revision.stats["chunk_stats"] = chunk_stats - - def verify(self, target: File, quarantine: QuarantineStore) -> bool: + def verify(self, target: File) -> Optional[ChunkMismatchReport]: s = self.rbd.image_reader( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) self.revision.stats["ceph-verification"] = "partial" - with s as source, target as target_: + with s as source: self.log.info("verify") - return backy.utils.files_are_roughly_equal( - source, - target_, - report=lambda s, t, o: quarantine.add_report( - QuarantineReport(s, t, o) - ), - ) + return backy.utils.files_are_roughly_equal(source, cast(IO, target)) def _delete_old_snapshots(self) -> None: # Clean up all snapshots except the one for the most recent valid @@ -473,62 +542,3 @@ def _delete_old_snapshots(self) -> None: "delete-old-snapshot-failed", snapshot_name=snapshot["name"], ) - - def create_snapshot(self, name: str) -> None: - consul = consulate.Consul(token=self.consul_acl_token) - snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) - self.log.info( - "creating-snapshot", - snapshot_name=name, - snapshot_key=snapshot_key, - ) - - consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} - - time.sleep(3) - try: - timeout = TimeOut( - self.snapshot_timeout, interval=2, raise_on_timeout=True - ) - while timeout.tick(): - for snapshot in self.rbd.snap_ls(self._image_name): - if snapshot["name"] == name: - return - except TimeOutError: - # The VM might have been shut down. Try doing a regular Ceph - # snapshot locally. - super(FlyingCircusRootDisk, self).create_snapshot(name) - except KeyboardInterrupt: - raise - finally: - # In case the snapshot still gets created: the general snapshot - # deletion code in ceph/source will clean up unused backy snapshots - # anyway. However, we need to work a little harder to delete old - # snapshot requests, otherwise we've sometimes seen those not - # getting deleted and then re-created all the time. - for key in list(consul.kv.find("snapshot/")): - try: - s = consul.kv[key] - except KeyError: - continue - try: - s = json.loads(s) - except json.decoder.JSONDecodeError: - # Clean up garbage. - self.log.warning( - "create-snapshot-removing-garbage-request", - snapshot_key=key, - ) - del consul.kv[key] - if s["vm"] != self.vm: - continue - # The knowledge about the `backy-` prefix isn't properly - # encapsulated here. - if s["snapshot"].startswith("backy-"): - self.log.info( - "create-snapshot-removing-request", - vm=s["vm"], - snapshot_name=s["snapshot"], - snapshot_key=key, - ) - del consul.kv[key] diff --git a/src/backy/rbd/tests/conftest.py b/src/backy/rbd/tests/conftest.py index 953bbbf7..47ee82e8 100644 --- a/src/backy/rbd/tests/conftest.py +++ b/src/backy/rbd/tests/conftest.py @@ -4,8 +4,25 @@ import pytest -import backy.rbd.sources.ceph -from backy.rbd.sources.ceph.rbd import RBDClient +import backy.rbd.rbd +from backy.rbd.source import RBDClient, RBDSource +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule + + +@pytest.fixture +def repository(tmp_path, log): + repo = Repository(tmp_path, RBDSource, Schedule(), log) + repo.connect() + return repo + + +def create_rev(repository, tags) -> Revision: + r = Revision.create(repository, tags, repository.log) + r.materialize() + repository.scan() + return repository.find_by_uuid(r.uuid) class CephCLIBase: @@ -66,7 +83,7 @@ def prep_cmdline(arg): arg = "version" return arg - cmdline = map(prep_cmdline, cmdline) + cmdline = list(map(prep_cmdline, cmdline)) args = self.parser.parse_args(cmdline) func = getattr(self, args.func) args = dict(args._get_kwargs()) @@ -81,7 +98,7 @@ def map(self, snapspec, read_only): # implementation restriction: `rbd unmap` takes imagespecs, snapspecs, or devices # as args, AFAIK we only use devices as args in backy for now - def unmap(self, device, read_only): + def unmap(self, device): ... def showmapped(self, format): @@ -245,7 +262,7 @@ def unmap(self, device): @pytest.fixture(params=[CephJewelCLI, CephLuminousCLI, CephNautilusCLI]) def rbdclient(request, tmp_path, monkeypatch, log): monkeypatch.setattr( - backy.rbd.sources.ceph, "CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF", True + backy.rbd.rbd, "CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF", True ) client = RBDClient(log) diff --git a/src/backy/rbd/tests/test_backup.py b/src/backy/rbd/tests/test_backup.py deleted file mode 100644 index 5e9263bd..00000000 --- a/src/backy/rbd/tests/test_backup.py +++ /dev/null @@ -1,73 +0,0 @@ -import os.path -import subprocess -from unittest import mock - -from backy.rbd.chunked import ChunkedFileBackend -from backy.rbd.conftest import create_rev -from backy.rbd.sources.file import File -from backy.utils import CHUNK_SIZE - - -def test_config(rbdrepository, tmp_path): - assert rbdrepository.path == tmp_path - assert isinstance(rbdrepository.source, File) - assert rbdrepository.source.filename == "input-file" - - -def test_restore_target(rbdrepository, log): - source = "input-file" - target = "restore.img" - with open(source, "wb") as f: - f.write(b"volume contents\n") - r = create_rev(rbdrepository, {"daily"}) - rbdrepository.backup(r.uuid) - rbdrepository.restore(r.uuid, target) - with open(source, "rb") as s, open(target, "rb") as t: - assert s.read() == t.read() - - -def test_restore_stdout(rbdrepository, capfd, log): - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - r = create_rev(rbdrepository, {"daily"}) - rbdrepository.backup(r.uuid) - rbdrepository.restore(r.uuid, "-") - assert not os.path.exists("-") - out, err = capfd.readouterr() - assert "volume contents\n" == out - - -def test_restore_backy_extract(rbdrepository, monkeypatch, log): - check_output = mock.Mock(return_value="backy-extract 1.1.0") - monkeypatch.setattr(subprocess, "check_output", check_output) - rbdrepository.restore_backy_extract = mock.Mock() - source = "input-file" - with open(source, "wb") as f: - f.write(b"a" * CHUNK_SIZE) - r = create_rev(rbdrepository, {"daily"}) - rbdrepository.backup(r.uuid) - rbdrepository.restore(r.uuid, "restore.img") - check_output.assert_called() - rbdrepository.restore_backy_extract.assert_called_once_with( - rbdrepository.find("0"), "restore.img" - ) - - -def test_backup_corrupted(rbdrepository, log): - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - r = create_rev(rbdrepository, {"daily"}) - rbdrepository.backup(r.uuid) - - store = ChunkedFileBackend(rbdrepository.history[0], log).store - chunk_path = store.chunk_path(next(iter(store.seen))) - os.chmod(chunk_path, 0o664) - with open(chunk_path, "wb") as f: - f.write(b"invalid") - r2 = create_rev(rbdrepository, {"daily"}) - rbdrepository.backup(r2.uuid) - - assert rbdrepository.history == [] - assert not os.path.exists(chunk_path) diff --git a/src/backy/rbd/tests/test_backy-rbd.py b/src/backy/rbd/tests/test_backy-rbd.py deleted file mode 100644 index 4946ef45..00000000 --- a/src/backy/rbd/tests/test_backy-rbd.py +++ /dev/null @@ -1,179 +0,0 @@ -import os -import subprocess - -import pytest - -from backy.ext_deps import BACKY_RBD_CMD, BASH -from backy.rbd import RbdSource -from backy.rbd.conftest import create_rev -from backy.revision import Revision -from backy.tests import Ellipsis - - -def generate_test_data(target, size, marker): - f = open(target, "wb") - block = 8 * 1024 - for chunk in range(size // block): - f.write(marker * block) - f.write(marker * (size % block)) - f.close() - - -def test_smoketest_internal(tmp_path, log): - # These copies of data are intended to be different versions of the same - # file. - source1 = str(tmp_path / "image1.qemu") - generate_test_data(source1, 2 * 1024**2, b"1") - source2 = str(tmp_path / "image2.qemu") - generate_test_data(source2, 2 * 1024**2, b"2") - source3 = str(tmp_path / "image3.qemu") - generate_test_data(source3, 2 * 1024**2, b"3") - - backup_dir = tmp_path / "image1.backup" - os.mkdir(str(backup_dir)) - with open(str(backup_dir / "config"), "wb") as f: - f.write( - ( - "{'source': {'type': 'file', 'filename': '%s'}," - "'schedule': {'daily': {'interval': '1d', 'keep': 7}}}" - % source1 - ).encode("utf-8") - ) - repository = RbdSource(backup_dir, log) - - # Backup first state - rev1 = create_rev(repository, {"manual:test"}) - repository.backup(rev1.uuid) - - # Restore first state from the newest revision - restore_target = str(tmp_path / "image1.restore") - repository.restore(rev1.uuid, restore_target) - with pytest.raises(IOError): - open(repository.history[-1].filename, "wb") - with pytest.raises(IOError): - open(repository.history[-1].info_filename, "wb") - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup second state - repository.source.filename = source2 - rev2 = create_rev(repository, {"test"}) - repository.backup(rev2.uuid) - assert len(repository.history) == 2 - - # Restore second state from second backup which is the newest at position 0 - repository.restore(rev2.uuid, restore_target) - d1 = open(source2, "rb").read() - d2 = open(restore_target, "rb").read() - assert d1 == d2 - - # Our original backup is now at position 1. Lets restore that again. - repository.restore(rev1.uuid, restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup second state again - repository.source.filename = source2 - rev3 = create_rev(repository, {"manual:test"}) - repository.backup(rev3.uuid) - assert len(repository.history) == 3 - - # Restore image2 from its most recent at position 0 - repository.restore(rev3.uuid, restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Restore image2 from its previous backup, now at position 1 - repository.restore(rev2.uuid, restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Our original backup is now at position 2. Lets restore that again. - repository.restore(rev1.uuid, restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup third state - repository.source.filename = source3 - rev4 = create_rev(repository, {"test"}) - repository.backup(rev4.uuid) - assert len(repository.history) == 4 - - # Restore image3 from the most curent state - repository.restore(rev4.uuid, restore_target) - assert open(source3, "rb").read() == open(restore_target, "rb").read() - - # Restore image2 from position 1 and 2 - repository.restore(rev3.uuid, restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - repository.restore(rev2.uuid, restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Restore image1 from position 3 - repository.restore(rev1.uuid, restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - -@pytest.mark.slow -def test_smoketest_external(): - output = subprocess.check_output( - [BASH, os.path.dirname(__file__) + "/smoketest.sh"], - env=os.environ | {"BACKY_RBD_CMD": BACKY_RBD_CMD}, - ) - output = output.decode("utf-8") - assert ( - Ellipsis( - """\ -Using /... as workspace. -Generating Test Data.. Done. -Backing up img_state1.img. Done. -Backing up img_state1.img with unknown tag. Done. -Restoring img_state1.img from level 0. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state2.img. Done. -Restoring img_state2.img from level 0. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 1. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state2.img again. Done. -Restoring img_state2.img from level 0. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state2.img from level 1. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 2. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state3.img. Done. -Restoring img_state3.img from level 0. Done. -Diffing restore_state3.img against img_state3.img. Success. -Restoring img_state2.img from level 1. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state2.img from level 2. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 3. Done. -Diffing restore_state1.img against img_state1.img. Success. -┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ -┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ -┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ daily │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ -4 revisions containing 2.0 MiB data (estimated) -┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ -┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ -┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ -3 revisions containing 1.5 MiB data (estimated) -""" - ) - == output - ) diff --git a/src/backy/rbd/tests/test_ceph.py b/src/backy/rbd/tests/test_ceph.py new file mode 100644 index 00000000..ee261aad --- /dev/null +++ b/src/backy/rbd/tests/test_ceph.py @@ -0,0 +1,384 @@ +import datetime +import io +import json +import subprocess +import time +from pathlib import Path +from unittest import mock + +import consulate +import pytest + +import backy.utils +from backy.rbd import RBDSource +from backy.rbd.rbd import RBDDiffV1 +from backy.rbd.source import CephRBD +from backy.revision import Revision + +BLOCK = backy.utils.PUNCH_SIZE + +with open(Path(__file__).parent / "nodata.rbddiff", "rb") as f: + SAMPLE_RBDDIFF = f.read() + + +@pytest.fixture +def check_output(monkeypatch): + check_output = mock.Mock() + check_output.return_value = b"{}" + monkeypatch.setattr(subprocess, "check_output", check_output) + return check_output + + +@pytest.fixture +def ceph_rbd(rbdclient, nosleep, log): + """Provides a CephRBD object configured for image pool/test, with rbd + being mocked away and allowing snapshots on that image.""" + ceph_rbd = CephRBD("test", "foo", log) + # rbdclient mock setup: + rbdclient._ceph_cli._register_image_for_snaps("test/foo") + ceph_rbd.rbd = rbdclient + return ceph_rbd + + +@pytest.fixture +def rbdsource(ceph_rbd, repository, log): + return RBDSource(repository, ceph_rbd, log) + + +@pytest.fixture +def nosleep(monkeypatch): + monkeypatch.setattr(time, "sleep", lambda x: None) + + +def test_no_consul(ceph_rbd, monkeypatch): + consul_class = mock.Mock() + monkeypatch.setattr(consulate, "Consul", consul_class) + + ceph_rbd.create_snapshot("asdf") + + consul_class.assert_not_called() + + +def test_assign_revision(nosleep, log): + ceph_rbd = CephRBD("test", "foo", log) + revision = mock.Mock() + context_manager = ceph_rbd(revision) + assert context_manager.revision is revision + + +def test_context_manager(ceph_rbd, repository, log): + """The imagesource context manager around a backup revision must create a + corresponding snapshot at enter, and clean up at exit.""" + + revision = Revision.create(repository, set(), log, uuid="1") + with ceph_rbd(revision): + assert ceph_rbd.rbd.snap_ls("test/foo")[0]["name"] == "backy-1" + + assert len(ceph_rbd.rbd.snap_ls("test/foo")) == 0 + + +def test_context_manager_cleans_out_snapshots(ceph_rbd, repository, log): + """The imagesource context manager cleans up unexpected backy snapshot revisions. + Snapshots without the prefix 'backy-' are left untouched.""" + + # snaps without backy- prefix are left untouched + ceph_rbd.rbd.snap_create("test/foo@someother") + # unexpected revision snapshots are cleaned + ceph_rbd.rbd.snap_create("test/foo@backy-2") + + revision = Revision.create(repository, set(), log, uuid="1") + revision.materialize() + repository.scan() + with ceph_rbd(revision): + pass + + assert ceph_rbd.rbd.snap_ls("test/foo") == [ + { + "id": 86925, + "name": "someother", + "protected": "false", + "size": 32212254720, + "timestamp": "Sun Feb 12 18:35:18 2023", + }, + { + "id": 86925, + "name": "backy-1", + "protected": "false", + "size": 32212254720, + "timestamp": "Sun Feb 12 18:35:18 2023", + }, + ] + + +def test_choose_full_without_parent(ceph_rbd, repository, log): + """When backing up a revision without a parent, a full backup needs to happen. + The diff function must not be called.""" + + revision = Revision.create(repository, set(), log) + + with ceph_rbd(revision) as s: + assert not s.get_parent() + + +def test_choose_full_without_snapshot(ceph_rbd, repository, log): + """When backing up a revision with an immediate parent that has no corresponding + snapshot, that parent must be ignored and a full backup has to be made. + The diff function must not be called.""" + + revision1 = Revision.create(repository, set(), log) + revision1.materialize() + + repository.scan() + + revision2 = Revision.create(repository, set(), log) + + with ceph_rbd(revision2) as s: + assert not s.get_parent() + + +def test_choose_diff_with_snapshot(ceph_rbd, repository, log): + """In an environment where a parent revision exists and has a snapshot, both + revisions shall be diffed.""" + + revision1 = Revision.create(repository, set(), log, uuid="a1") + revision1.materialize() + + # part of test setup: we check backy's behavior when a previous version not only + # exists, but also has a snapshot + ceph_rbd.rbd.snap_create("test/foo@backy-a1") + + repository.scan() + + revision2 = Revision.create(repository, set(), log) + + with ceph_rbd(revision2) as s: + assert s.get_parent().uuid == revision1.uuid + + +def test_diff_backup(ceph_rbd, rbdsource, repository, tmp_path, log): + """When doing a diff backup between two revisions with snapshot, the RBDDiff needs + to be called properly, a snapshot for the new revision needs to be created and the + snapshot of the previous revision needs to be removed after the successfull backup.""" + + parent = Revision.create( + repository, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" + ) + parent.materialize() + + # Those revision numbers are taken from the sample snapshot and need + # to match, otherwise our diff integration will (correctly) complain. + revision = Revision.create( + repository, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" + ) + revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) + + with rbdsource.open(parent) as f: + f.write(b"asdf") + + repository.scan() + revision.materialize() + + # test setup: ensure that previous revision has a snapshot. It needs to be removed + # by the backup process + ceph_rbd.rbd.snap_create( + "test/foo@backy-ed968696-5ab0-4fe0-af1c-14cadab44661" + ) + + with mock.patch("backy.rbd.rbd.RBDClient.export_diff") as export: + export.return_value = mock.MagicMock() + export.return_value.__enter__.return_value = RBDDiffV1( + io.BytesIO(SAMPLE_RBDDIFF) + ) + with ceph_rbd(revision), rbdsource.open(revision) as f: + ceph_rbd.diff(f, revision.get_parent()) + repository.history.append(revision) + export.assert_called_with( + "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", + "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", + ) + + current_snaps = ceph_rbd.rbd.snap_ls("test/foo") + assert len(current_snaps) == 1 + assert ( + current_snaps[0]["name"] == "backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" + ) + + +def test_full_backup(ceph_rbd, rbdsource, repository, tmp_path, log): + # Those revision numbers are taken from the sample snapshot and need + # to match, otherwise our diff integration will (correctly) complain. + revision = Revision.create(repository, set(), log, uuid="a0") + revision.materialize() + repository.scan() + + with mock.patch("backy.rbd.rbd.RBDClient.export") as export: + export.return_value = io.BytesIO(b"Han likes Leia.") + with ceph_rbd(revision), rbdsource.open(revision) as f: + ceph_rbd.full(f) + export.assert_called_with("test/foo@backy-a0") + + # the corresponding snapshot for revision a0 is created by the backup process + assert ceph_rbd.rbd.snap_ls("test/foo")[0]["name"] == "backy-a0" + + with rbdsource.open(revision) as f: + assert f.read() == b"Han likes Leia." + + # Now make another full backup. This overwrites the first. + revision2 = Revision.create(repository, set(), log, uuid="a1") + revision2.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) + revision2.materialize() + repository.scan() + + with mock.patch("backy.rbd.rbd.RBDClient.export") as export: + export.return_value = io.BytesIO(b"Han loves Leia.") + with ceph_rbd(revision2), rbdsource.open(revision2) as f: + ceph_rbd.full(f) + + with rbdsource.open(revision2) as f: + assert f.read() == b"Han loves Leia." + + current_snaps = ceph_rbd.rbd.snap_ls("test/foo") + assert len(current_snaps) == 1 + assert current_snaps[0]["name"] == "backy-a1" + + +def test_full_backup_integrates_changes( + ceph_rbd, rbdsource, repository, tmp_path, log +): + # The backup source changes between two consecutive full backups. Both + # backup images should reflect the state of the source at the time the + # backup was run. This test is here to detect regressions while optimizing + # the full backup algorithms (copying and applying deltas). + content0 = BLOCK * b"A" + BLOCK * b"B" + BLOCK * b"C" + BLOCK * b"D" + content1 = BLOCK * b"A" + BLOCK * b"X" + BLOCK * b"\0" + BLOCK * b"D" + + rev0 = Revision.create(repository, set(), log) + rev0.materialize() + repository.scan() + + rev1 = Revision.create(repository, set(), log) + rev1.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) + rev1.materialize() + + # check fidelity + for content, rev in [(content0, rev0), (content1, rev1)]: + with mock.patch("backy.rbd.rbd.RBDClient.export") as export: + export.return_value = io.BytesIO(content) + with ceph_rbd(rev), rbdsource.open(rev) as target: + ceph_rbd.full(target) + export.assert_called_with("test/foo@backy-{}".format(rev.uuid)) + + with rbdsource.open(rev) as f: + assert content == f.read() + + +def test_verify_fail(ceph_rbd, rbdsource, repository, tmp_path, log): + # Those revision numbers are taken from the sample snapshot and need + # to match, otherwise our diff integration will (correctly) complain. + revision = Revision.create(repository, set(), log) + revision.materialize() + + repository.scan() + + rbd_source = str(tmp_path / "-dev-rbd0") + with open(rbd_source, "w") as f: + f.write("Han likes Leia.") + + with rbdsource.open(revision) as f: + f.write(b"foobar") + # The chunked store has false data, so this needs to be detected. + with ceph_rbd(revision), rbdsource.open(revision) as target: + assert ceph_rbd.verify(target) + + +def test_verify(ceph_rbd, rbdsource, repository, tmp_path, log): + # Those revision numbers are taken from the sample snapshot and need + # to match, otherwise our diff integration will (correctly) complain. + revision = Revision.create(repository, set(), log, uuid="a0") + revision.materialize() + + repository.scan() + + rbd_source = ceph_rbd.rbd.map("test/foo@backy-a0")["device"] + with open(rbd_source, "wb") as f: + f.write(b"Han likes Leia.") + ceph_rbd.rbd.unmap(rbd_source) + + with rbdsource.open(revision) as f: + f.write(b"Han likes Leia.") + f.flush() + + with ceph_rbd(revision), rbdsource.open(revision) as target: + assert not ceph_rbd.verify(target) + + +@pytest.fixture +def fcrd(log): + return CephRBD( + "test", + "test01.root", + log, + "test01", + "12345", + ) + + +def test_flyingcircus_source(fcrd): + assert fcrd.pool == "test" + assert fcrd.image == "test01.root" + assert fcrd.vm == "test01" + assert fcrd.consul_acl_token == "12345" + + +@pytest.mark.slow +def test_flyingcircus_consul_interaction(monkeypatch, fcrd): + consul_class = mock.Mock() + consul = consul_class() + consul.kv = ConsulKVDict() + monkeypatch.setattr(consulate, "Consul", consul_class) + + check_output = mock.Mock() + check_output.side_effect = ["[]", '[{"name": "asdf"}]'] + monkeypatch.setattr(subprocess, "check_output", check_output) + fcrd.create_snapshot("asdf") + + +class ConsulKVDict(dict): + def __setitem__(self, k, v): + if not isinstance(v, bytes): + v = json.dumps(v) + super(ConsulKVDict, self).__setitem__(k, v) + + def find(self, prefix): + for key in self: + if key.startswith(prefix): + yield key + + +@pytest.mark.slow +def test_flyingcircus_consul_interaction_timeout(monkeypatch, fcrd): + consul_class = mock.Mock() + consul = consul_class() + consul.kv = ConsulKVDict() + monkeypatch.setattr(consulate, "Consul", consul_class) + + check_output = mock.Mock() + check_output.side_effect = [ + '[{"name": "bsdf"}]', + "[]", + "[]", + "[]", + "[]", + "[]", + ] + monkeypatch.setattr(subprocess, "check_output", check_output) + + fcrd.snapshot_timeout = 2 + fcrd.create_snapshot("asdf") + + assert check_output.call_args[0][0] == [ + "rbd", + "snap", + "create", + "test/test01.root@asdf", + ] diff --git a/src/backy/rbd/tests/test_ceph_source.py b/src/backy/rbd/tests/test_ceph_source.py deleted file mode 100644 index 13d7c32d..00000000 --- a/src/backy/rbd/tests/test_ceph_source.py +++ /dev/null @@ -1,346 +0,0 @@ -import datetime -import io -import os.path as p -import subprocess -import time -from unittest import mock - -import pytest - -import backy.utils -from backy.rbd.chunked import ChunkedFileBackend -from backy.rbd.sources import select_source -from backy.rbd.sources.ceph.source import CephRBD -from backy.revision import Revision - -BLOCK = backy.utils.PUNCH_SIZE - -with open(p.join(p.dirname(__file__), "nodata.rbddiff"), "rb") as f: - SAMPLE_RBDDIFF = f.read() - - -@pytest.fixture -def check_output(monkeypatch): - check_output = mock.Mock() - check_output.return_value = b"{}" - monkeypatch.setattr(subprocess, "check_output", check_output) - return check_output - - -@pytest.fixture -def ceph_rbd_imagesource(rbdclient, rbdrepository, nosleep, log): - """Provides a CephRBD object configured for image pool/test, with rbd - being mocked away and allowing snapshots on that image.""" - source = CephRBD(dict(pool="test", image="foo"), rbdrepository, log) - # rbdclient mock setup: - rbdclient._ceph_cli._register_image_for_snaps("test/foo") - source.rbd = rbdclient - return source - - -@pytest.fixture -def nosleep(monkeypatch): - monkeypatch.setattr(time, "sleep", lambda x: None) - - -def test_select_ceph_source(): - assert select_source("ceph-rbd") == CephRBD - - -def test_assign_revision(nosleep, log): - source = CephRBD(dict(pool="test", image="foo"), mock.Mock(), log) - revision = mock.Mock() - context_manager = source(revision) - assert context_manager.revision is revision - - -def test_context_manager(rbdrepository, ceph_rbd_imagesource, log): - """The imagesource context manager around a backup revision must create a - corresponding snapshot at enter, and clean up at exit.""" - source = ceph_rbd_imagesource - - revision = Revision.create(rbdrepository, set(), log, uuid="1") - with source(revision): - assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-1" - - assert len(source.rbd.snap_ls("test/foo")) == 0 - - -def test_context_manager_cleans_out_snapshots( - ceph_rbd_imagesource, rbdrepository, log -): - """The imagesource context manager cleans up unexpected backy snapshot revisions. - Snapshots without the prefix 'backy-' are left untouched.""" - source = ceph_rbd_imagesource - - # snaps without backy- prefix are left untouched - source.rbd.snap_create("test/foo@someother") - # unexpected revision snapshots are cleaned - source.rbd.snap_create("test/foo@backy-2") - - revision = Revision.create(rbdrepository, set(), log, uuid="1") - with source(revision): - revision.materialize() - rbdrepository.scan() - - assert source.rbd.snap_ls("test/foo") == [ - { - "id": 86925, - "name": "someother", - "protected": "false", - "size": 32212254720, - "timestamp": "Sun Feb 12 18:35:18 2023", - }, - { - "id": 86925, - "name": "backy-1", - "protected": "false", - "size": 32212254720, - "timestamp": "Sun Feb 12 18:35:18 2023", - }, - ] - - -def test_choose_full_without_parent(ceph_rbd_imagesource, rbdrepository, log): - """When backing up a revision without a parent, a full backup needs to happen. - The diff function must not be called.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision = Revision.create(rbdrepository, set(), log) - - with source(revision) as s: - s.backup(ChunkedFileBackend(revision, log)) - - assert not source.diff.called - assert source.full.called - - -def test_choose_full_without_snapshot(ceph_rbd_imagesource, rbdrepository, log): - """When backing up a revision with an immediate parent that has no corresponding - snapshot, that parent must be ignored and a full backup has to be made. - The diff function must not be called.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision1 = Revision.create(rbdrepository, set(), log) - revision1.materialize() - - rbdrepository.scan() - - revision2 = Revision.create(rbdrepository, set(), log) - - with source(revision2) as s: - s.backup(ChunkedFileBackend(revision2, log)) - - assert not source.diff.called - assert source.full.called - - -def test_choose_diff_with_snapshot(ceph_rbd_imagesource, rbdrepository, log): - """In an environment where a parent revision exists and has a snapshot, both - revisions shall be diffed.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision1 = Revision.create(rbdrepository, set(), log, uuid="a1") - revision1.materialize() - - # part of test setup: we check backy's behavior when a previous version not only - # exists, but also has a snapshot - source.rbd.snap_create("test/foo@backy-a1") - - rbdrepository.scan() - - revision2 = Revision.create(rbdrepository, set(), log) - - with source(revision2) as s: - s.backup(ChunkedFileBackend(revision2, log)) - - assert source.diff.called - assert not source.full.called - - -def test_diff_backup(ceph_rbd_imagesource, rbdrepository, tmp_path, log): - """When doing a diff backup between two revisions with snapshot, the RBDDiff needs - to be called properly, a snapshot for the new revision needs to be created and the - snapshot of the previous revision needs to be removed after the successfull backup.""" - from backy.rbd.sources.ceph.diff import RBDDiffV1 - - source = ceph_rbd_imagesource - - parent = Revision.create( - rbdrepository, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" - ) - parent.materialize() - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create( - rbdrepository, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" - ) - revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - - with ChunkedFileBackend(parent, log).open("wb") as f: - f.write(b"asdf") - - rbdrepository.scan() - revision.materialize() - - # test setup: ensure that previous revision has a snapshot. It needs to be removed - # by the backup process - source.rbd.snap_create( - "test/foo@backy-ed968696-5ab0-4fe0-af1c-14cadab44661" - ) - - with mock.patch( - "backy.rbd.sources.ceph.rbd.RBDClient.export_diff" - ) as export: - export.return_value = mock.MagicMock() - export.return_value.__enter__.return_value = RBDDiffV1( - io.BytesIO(SAMPLE_RBDDIFF) - ) - with source(revision): - source.diff( - ChunkedFileBackend(revision, log), revision.get_parent() - ) - rbdrepository.history.append(revision) - export.assert_called_with( - "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", - "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", - ) - - current_snaps = source.rbd.snap_ls("test/foo") - assert len(current_snaps) == 1 - assert ( - current_snaps[0]["name"] == "backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" - ) - - -def test_full_backup(ceph_rbd_imagesource, rbdrepository, tmp_path, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdrepository, set(), log, uuid="a0") - revision.materialize() - rbdrepository.scan() - - with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: - export.return_value = io.BytesIO(b"Han likes Leia.") - backend = ChunkedFileBackend(revision, log) - with source(revision): - source.full(backend) - export.assert_called_with("test/foo@backy-a0") - - # the corresponding snapshot for revision a0 is created by the backup process - assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-a0" - - with backend.open("rb") as f: - assert f.read() == b"Han likes Leia." - - # Now make another full backup. This overwrites the first. - revision2 = Revision.create(rbdrepository, set(), log, uuid="a1") - revision2.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - revision2.materialize() - rbdrepository.scan() - - with mock.patch("backy.rbd.sources.ceph.rbd.RBDClient.export") as export: - export.return_value = io.BytesIO(b"Han loves Leia.") - backend = ChunkedFileBackend(revision2, log) - with source(revision2): - source.full(backend) - - with backend.open("rb") as f: - assert f.read() == b"Han loves Leia." - - current_snaps = source.rbd.snap_ls("test/foo") - assert len(current_snaps) == 1 - assert current_snaps[0]["name"] == "backy-a1" - - -def test_full_backup_integrates_changes( - ceph_rbd_imagesource, rbdrepository, tmp_path, log -): - # The backup source changes between two consecutive full backups. Both - # backup images should reflect the state of the source at the time the - # backup was run. This test is here to detect regressions while optimizing - # the full backup algorithms (copying and applying deltas). - source = ceph_rbd_imagesource - content0 = BLOCK * b"A" + BLOCK * b"B" + BLOCK * b"C" + BLOCK * b"D" - content1 = BLOCK * b"A" + BLOCK * b"X" + BLOCK * b"\0" + BLOCK * b"D" - - rev0 = Revision.create(rbdrepository, set(), log) - rev0.materialize() - rbdrepository.scan() - - rev1 = Revision.create(rbdrepository, set(), log) - rev1.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - rev1.materialize() - - # check fidelity - for content, rev in [(content0, rev0), (content1, rev1)]: - backend = ChunkedFileBackend(rev, log) - with mock.patch( - "backy.rbd.sources.ceph.rbd.RBDClient.export" - ) as export: - export.return_value = io.BytesIO(content) - with source(rev): - source.full(backend) - export.assert_called_with("test/foo@backy-{}".format(rev.uuid)) - - with backend.open("rb") as f: - assert content == f.read() - - -def test_verify_fail(rbdrepository, tmp_path, ceph_rbd_imagesource, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdrepository, set(), log) - revision.materialize() - - rbdrepository.scan() - - rbd_source = str(tmp_path / "-dev-rbd0") - with open(rbd_source, "w") as f: - f.write("Han likes Leia.") - - backend = ChunkedFileBackend(revision, log) - with backend.open("wb") as f: - f.write(b"foobar") - # The backend has false data, so this needs to be detected. - with source(revision): - assert not source.verify(backend) - assert len(rbdrepository.quarantine.report_ids) == 1 - - -def test_verify(ceph_rbd_imagesource, rbdrepository, tmp_path, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(rbdrepository, set(), log, uuid="a0") - revision.materialize() - - rbdrepository.scan() - - rbd_source = source.rbd.map("test/foo@backy-a0")["device"] - with open(rbd_source, "wb") as f: - f.write(b"Han likes Leia.") - source.rbd.unmap(rbd_source) - - backend = ChunkedFileBackend(revision, log) - with backend.open("wb") as f: - f.write(b"Han likes Leia.") - f.flush() - - with source(revision): - assert source.verify(backend) diff --git a/src/backy/rbd/tests/test_diff.py b/src/backy/rbd/tests/test_diff.py index a926a24c..664d7963 100644 --- a/src/backy/rbd/tests/test_diff.py +++ b/src/backy/rbd/tests/test_diff.py @@ -1,11 +1,10 @@ import io -import os -import os.path import struct +from pathlib import Path import pytest -from backy.rbd.sources.ceph.diff import ( +from backy.rbd.rbd import ( Data, FromSnap, RBDDiffV1, @@ -254,11 +253,10 @@ def test_read_detects_wrong_record_type(tmp_path): def test_read_empty_diff(tmp_path): - diff = RBDDiffV1(open(os.path.dirname(__file__) + "/nodata.rbddiff", "rb")) + diff = RBDDiffV1(open(Path(__file__).parent / "nodata.rbddiff", "rb")) target = open(str(tmp_path / "foo"), "wb") diff.integrate( target, "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", "backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", - clean=False, ) diff --git a/src/backy/rbd/tests/test_fc_source.py b/src/backy/rbd/tests/test_fc_source.py deleted file mode 100644 index 7e39b664..00000000 --- a/src/backy/rbd/tests/test_fc_source.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -import subprocess -from unittest import mock - -import consulate -import pytest - -from backy.rbd.sources import select_source -from backy.rbd.sources.flyingcircus.source import FlyingCircusRootDisk - - -@pytest.fixture -def fcrd(log, rbdrepository): - return FlyingCircusRootDisk( - { - "pool": "test", - "image": "test01.root", - "vm": "test01", - "consul_acl_token": "12345", - }, - rbdrepository, - log, - ) - - -def test_select_flyingcircus_source(): - assert select_source("flyingcircus") == FlyingCircusRootDisk - - -def test_flyingcircus_source(fcrd): - assert fcrd.pool == "test" - assert fcrd.image == "test01.root" - assert fcrd.vm == "test01" - assert fcrd.consul_acl_token == "12345" - - -@pytest.mark.slow -def test_flyingcircus_consul_interaction(monkeypatch, fcrd): - consul_class = mock.Mock() - consul = consul_class() - consul.kv = ConsulKVDict() - monkeypatch.setattr(consulate, "Consul", consul_class) - - check_output = mock.Mock() - check_output.side_effect = ["[]", '[{"name": "asdf"}]'] - monkeypatch.setattr(subprocess, "check_output", check_output) - fcrd.create_snapshot("asdf") - - -class ConsulKVDict(dict): - def __setitem__(self, k, v): - if not isinstance(v, bytes): - v = json.dumps(v) - super(ConsulKVDict, self).__setitem__(k, v) - - def find(self, prefix): - for key in self: - if key.startswith(prefix): - yield key - - -@pytest.mark.slow -def test_flyingcircus_consul_interaction_timeout(monkeypatch, fcrd): - consul_class = mock.Mock() - consul = consul_class() - consul.kv = ConsulKVDict() - monkeypatch.setattr(consulate, "Consul", consul_class) - - check_output = mock.Mock() - check_output.side_effect = [ - '[{"name": "bsdf"}]', - "[]", - "[]", - "[]", - "[]", - "[]", - ] - monkeypatch.setattr(subprocess, "check_output", check_output) - - fcrd.snapshot_timeout = 2 - fcrd.create_snapshot("asdf") - - assert check_output.call_args[0][0] == [ - "rbd", - "snap", - "create", - "test/test01.root@asdf", - ] diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index c04afcf0..43930166 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -9,6 +9,10 @@ import backy.rbd from backy import utils from backy.rbd import main +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule +from backy.source import Source from backy.tests import Ellipsis @@ -21,6 +25,34 @@ def argv(): sys.argv = original +@pytest.fixture +def repository_on_disk(tmp_path, log): + with open(tmp_path / "config", "w", encoding="utf-8") as f: + f.write( + f""" +--- +path: "{tmp_path}" +schedule: + daily: + interval: 1d + keep: 7 +type: rbd +""" + ) + with open(tmp_path / "source.config", "w", encoding="utf-8") as f: + f.write( + """ +--- +type: rbd +pool: a +image: b +""" + ) + repo = Repository(tmp_path, Source, Schedule(), log) + repo.connect() + return repo + + def test_display_usage(capsys, argv): with pytest.raises(SystemExit) as exit: main() @@ -69,49 +101,90 @@ def test_verbose_logging(capsys, argv): def print_args(*args, return_value=None, **kw): - print(args) + print(", ".join(map(repr, args))) pprint.pprint(kw) return return_value -@pytest.mark.parametrize("success", [False, True]) -def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): - os.makedirs(tmp_path / "backy") - os.chdir(tmp_path / "backy") +@pytest.mark.parametrize( + ["fun", "args", "rv", "rc", "params"], + [ + ( + "backup", + ["asdf"], + 0, + 1, + [""], + ), + ( + "backup", + ["asdf"], + 1, + 0, + [""], + ), + ( + "restore", + ["asdf", "out.img"], + None, + 0, + [ + "", + "RestoreArgs(target='out.img', backend=)", + ], + ), + ( + "restore", + ["asdf", "--backend", "python", "out.img"], + None, + 0, + [ + "", + "RestoreArgs(target='out.img', backend=)", + ], + ), + ("gc", [], None, 0, []), + ( + "verify", + ["asdf"], + None, + 0, + [""], + ), + ], +) +def test_call_fun( + fun, + args, + rv, + rc, + params, + repository_on_disk, + tmp_path, + capsys, + argv, + monkeypatch, + log, +): + os.chdir(tmp_path) - with open(tmp_path / "backy" / "config", "wb") as f: - f.write( - """ ---- -schedule: - daily: - interval: 1d - keep: 7 -source: - type: file - filename: {} -""".format( - __file__ - ).encode( - "utf-8" - ) - ) + Revision(repository_on_disk, log, uuid="asdf").materialize() monkeypatch.setattr( - backy.rbd.RbdSource, - "backup", - partialmethod(print_args, return_value=success), + backy.rbd.source.RBDSource, + fun, + partialmethod(print_args, return_value=rv), ) - argv.extend(["-v", "backup", "asdf"]) + argv.extend(["-v", fun, *args]) utils.log_data = "" with pytest.raises(SystemExit) as exit: main() out, err = capsys.readouterr() assert ( Ellipsis( - """\ -(, 'asdf') -{} + f"""\ +{", ".join(["", *params])} +{{}} """ ) == out @@ -119,29 +192,30 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): assert ( Ellipsis( f"""\ -... D - command/invoked args='... -v backup asdf' -... D - quarantine/scan entries=0 -... D - command/return-code code={int(not success)} +... D - command/invoked args='... -v {" ".join([fun, *args])}' +... D - repo/scan-reports entries=0 +... I - chunked-store/to-v2 \n\ +... I - chunked-store/to-v2-finished \n\ +... D - command/return-code code={rc} """ ) == utils.log_data ) - assert exit.value.code == int(not success) + assert exit.value.code == rc -# TODO: test call restore, verify, gc def test_call_unexpected_exception( - capsys, rbdrepository, argv, monkeypatch, log, tmp_path + capsys, repository_on_disk, argv, monkeypatch, log, tmp_path ): def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.rbd.RbdSource, "gc", do_raise) + monkeypatch.setattr(backy.rbd.RBDSource, "gc", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) - argv.extend(["-b", str(rbdrepository.path), "gc"]) + argv.extend(["-b", str(repository_on_disk.path), "gc"]) utils.log_data = "" with pytest.raises(SystemExit): main() @@ -152,11 +226,13 @@ def do_raise(*args, **kw): Ellipsis( """\ ... D - command/invoked args='... -b ... gc' -... D - quarantine/scan entries=0 +... D - repo/scan-reports entries=0 +... I - chunked-store/to-v2 \n\ +... I - chunked-store/to-v2-finished \n\ ... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): exception>\t File ".../src/backy/rbd/__init__.py", line ..., in main -exception>\t b.gc() +exception>\t source.gc() exception>\t File ".../src/backy/rbd/tests/test_main.py", line ..., in do_raise exception>\t raise RuntimeError("test") exception>\tRuntimeError: test diff --git a/src/backy/rbd/tests/test_rbd.py b/src/backy/rbd/tests/test_rbd.py index b4a30c5f..2c9756e4 100644 --- a/src/backy/rbd/tests/test_rbd.py +++ b/src/backy/rbd/tests/test_rbd.py @@ -1,13 +1,11 @@ -import os import subprocess from unittest import mock import pytest -import backy.rbd.sources.ceph from backy.ext_deps import RBD -from backy.rbd.sources.ceph.diff import RBDDiffV1 -from backy.rbd.sources.ceph.rbd import RBDClient +from backy.rbd.rbd import RBDDiffV1 +from backy.rbd.source import RBDClient @mock.patch("subprocess.check_output") diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index ea638a23..94a64061 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,23 +1,308 @@ -from backy.rbd import RbdSource -from backy.rbd.sources.ceph.source import CephRBD +import json +import os +import subprocess +from pathlib import Path +from typing import IO +from unittest import mock +import pytest -def test_configure_ceph_source(tmp_path, log): - with open(str(tmp_path / "config"), "w") as f: - f.write( +from backy.ext_deps import BACKY_RBD_CMD, BASH +from backy.rbd import RBDSource, RestoreArgs +from backy.rbd.source import CephRBD +from backy.rbd.tests.conftest import create_rev +from backy.tests import Ellipsis +from backy.utils import CHUNK_SIZE + + +class FakeCephRBD: + data = "" + + def __init__(self, data): + self.data = data + + def ready(self): + return bool(self.data) + + def __call__(self, *args, **kwargs): + return self + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def get_parent(self): + return None + + def full(self, file): + assert self.data + file.write(self.data) + + def verify(self, target: IO): + assert self.data == target.read() + + +@pytest.fixture +def rbdsource(repository, log): + return RBDSource(repository, FakeCephRBD(b""), log) + + +def test_configure_rbd_source_no_consul(repository, tmp_path, log): + with open(tmp_path / "source.config", "w", encoding="utf-8") as f: + json.dump( + { + "type": "rbd", + "pool": "test", + "image": "test04.root", + }, + f, + ) + source = repository.get_source() + assert isinstance(source, RBDSource) + ceph_rbd = source.ceph_rbd + assert isinstance(ceph_rbd, CephRBD) + assert ceph_rbd.pool == "test" + assert ceph_rbd.image == "test04.root" + assert ceph_rbd.always_full is False + assert ceph_rbd.vm is None + assert ceph_rbd.consul_acl_token is None + + +def test_configure_rbd_source_consul(repository, tmp_path, log): + with open(tmp_path / "source.config", "w", encoding="utf-8") as f: + json.dump( + { + "type": "rbd", + "pool": "test", + "image": "test04.root", + "full-always": True, + "vm": "test04", + "consul_acl_token": "token", + }, + f, + ) + source = repository.get_source() + assert isinstance(source, RBDSource) + ceph_rbd = source.ceph_rbd + assert isinstance(ceph_rbd, CephRBD) + assert ceph_rbd.pool == "test" + assert ceph_rbd.image == "test04.root" + assert ceph_rbd.always_full is True + assert ceph_rbd.vm == "test04" + assert ceph_rbd.consul_acl_token == "token" + + +def test_restore_target(rbdsource, repository, tmp_path, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + target = tmp_path / "restore.img" + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RestoreArgs(str(target))) + with open(target, "rb") as t: + assert data == t.read() + + +def test_restore_stdout(rbdsource, repository, capfd, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RestoreArgs("-")) + assert not Path("-").exists() + out, err = capfd.readouterr() + assert data.decode("utf-8") == out + + +def test_restore_backy_extract(rbdsource, repository, monkeypatch, log): + check_output = mock.Mock(return_value="backy-extract 1.1.0") + monkeypatch.setattr(subprocess, "check_output", check_output) + rbdsource.restore_backy_extract = mock.Mock() + data = b"a" * CHUNK_SIZE + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RestoreArgs("restore.img")) + check_output.assert_called() + rbdsource.restore_backy_extract.assert_called_once_with(r, "restore.img") + + +def test_backup_corrupted(rbdsource, repository, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + + chunk_path = rbdsource.store.chunk_path(next(iter(rbdsource.store.seen))) + chunk_path.chmod(0o664) + with open(chunk_path, "wb") as f: + f.write(b"invalid") + r2 = create_rev(repository, {"daily"}) + rbdsource.backup(r2) + + assert repository.history == [] + assert not chunk_path.exists() + + +def test_gc(rbdsource, repository, log): + r = create_rev(repository, set()) + # Write 1 version to the file + with rbdsource.open(r) as f: + f.write(b"asdf") + remote = create_rev(repository, set()) # remote revision without local data + remote.server = "remote" + remote.materialize() + + # Reassign as the scan will create a new reference + r = repository.find_by_uuid(r.uuid) + assert len(list(rbdsource.store.ls())) == 1 + rbdsource.gc() + assert len(list(rbdsource.store.ls())) == 1 + r.remove() + rbdsource.gc() + assert len(list(rbdsource.store.ls())) == 0 + + +def test_smoketest_internal(rbdsource, repository, tmp_path, log): + # These copies of data are intended to be different versions of the same + # file. + data1 = b"1" * 2 * 1024**2 + data2 = b"2" * 2 * 1024**2 + data3 = b"3" * 2 * 1024**2 + + # Backup first state + rbdsource.ceph_rbd.data = data1 + rev1 = create_rev(repository, {"manual:test"}) + rbdsource.backup(rev1) + + # Restore first state from the newest revision + restore_args = RestoreArgs(str(tmp_path / "image1.restore")) + rbdsource.restore(rev1, restore_args) + with pytest.raises(IOError): + open(repository.history[-1].info_filename, "wb") + assert data1 == open(restore_args.target, "rb").read() + + # Backup second state + rbdsource.ceph_rbd.data = data2 + rev2 = create_rev(repository, {"test"}) + rbdsource.backup(rev2) + assert len(repository.history) == 2 + + # Restore second state from second backup which is the newest at position 0 + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Our original backup is now at position 1. Lets restore that again. + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + # Backup second state again + rbdsource.ceph_rbd.data = data2 + rev3 = create_rev(repository, {"manual:test"}) + rbdsource.backup(rev3) + assert len(repository.history) == 3 + + # Restore image2 from its most recent at position 0 + rbdsource.restore(rev3, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Restore image2 from its previous backup, now at position 1 + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Our original backup is now at position 2. Lets restore that again. + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + # Backup third state + rbdsource.ceph_rbd.data = data3 + rev4 = create_rev(repository, {"test"}) + rbdsource.backup(rev4) + assert len(repository.history) == 4 + + # Restore image3 from the most curent state + rbdsource.restore(rev4, restore_args) + assert data3 == open(restore_args.target, "rb").read() + + # Restore image2 from position 1 and 2 + rbdsource.restore(rev3, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Restore image1 from position 3 + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + +@pytest.mark.slow +@pytest.mark.skip +def test_smoketest_external(): + output = subprocess.check_output( + [BASH, Path(__file__).parent / "smoketest.sh"], + env=os.environ | {"BACKY_RBD_CMD": BACKY_RBD_CMD}, + ) + output = output.decode("utf-8") + assert ( + Ellipsis( """\ ---- - schedule: - daily: - interval: 1d - keep: 7 - source: - type: ceph-rbd - pool: test - image: test04 +Using /... as workspace. +Generating Test Data.. Done. +Backing up img_state1.img. Done. +Backing up img_state1.img with unknown tag. Done. +Restoring img_state1.img from level 0. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state2.img. Done. +Restoring img_state2.img from level 0. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 1. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state2.img again. Done. +Restoring img_state2.img from level 0. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state2.img from level 1. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 2. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state3.img. Done. +Restoring img_state3.img from level 0. Done. +Diffing restore_state3.img against img_state3.img. Success. +Restoring img_state2.img from level 1. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state2.img from level 2. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 3. Done. +Diffing restore_state1.img against img_state1.img. Success. +┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ +┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ +┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ +┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ daily │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ +4 revisions containing 2.0 MiB data (estimated) +┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ +┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ +┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ +┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ +3 revisions containing 1.5 MiB data (estimated) """ ) - backup = RbdSource(tmp_path, log) - assert isinstance(backup.source, CephRBD) - assert backup.source.pool == "test" - assert backup.source.image == "test04" + == output + ) diff --git a/src/backy/report.py b/src/backy/report.py new file mode 100644 index 00000000..f3d9d669 --- /dev/null +++ b/src/backy/report.py @@ -0,0 +1,95 @@ +import dataclasses +import datetime +import hashlib +import traceback +from dataclasses import dataclass, field +from pathlib import Path + +import shortuuid +import yaml +from structlog.stdlib import BoundLogger +from yaml import SafeDumper + +import backy +from backy.utils import SafeFile + + +@dataclass(frozen=True) +class ProblemReport: + uuid: str = field(init=False, default_factory=shortuuid.uuid) + timestamp: datetime.datetime = field( + init=False, default_factory=backy.utils.now + ) + + def to_dict(self) -> dict: + return dataclasses.asdict(self) + + def store(self, dir: Path, log: BoundLogger) -> None: + log.debug("store-report", uuid=self.uuid) + path = dir / f"{self.uuid}.report" + if path.exists(): + log.debug("store-report-exists", uuid=self.uuid) + return + + class CustomDumper(SafeDumper): + pass + + def representer(dumper, data): + return dumper.represent_scalar( + "tag:yaml.org,2002:str", + data, + style="|" if len(data) > 100 else None, + ) + + yaml.add_representer(str, representer, Dumper=CustomDumper) + + with SafeFile(path, encoding="utf-8") as f: + f.open_new("wb") + yaml.dump(self.to_dict(), f, sort_keys=False, Dumper=CustomDumper) + + +@dataclass(frozen=True) +class ChunkMismatchReport(ProblemReport): + source_chunk: bytes + source_hash: str = field(init=False) + target_chunk: bytes + target_hash: str = field(init=False) + offset: int + traceback: str = field(init=False) + + def __post_init__(self) -> None: + object.__setattr__( + self, "source_hash", hashlib.md5(self.source_chunk).hexdigest() + ) + object.__setattr__( + self, "target_hash", hashlib.md5(self.target_chunk).hexdigest() + ) + object.__setattr__( + self, "traceback", "".join(traceback.format_stack()).strip() + ) + + def to_dict(self) -> dict: + dict = dataclasses.asdict(self) + del dict["source_chunk"] + del dict["target_chunk"] + return dict + + def store(self, dir: Path, log: BoundLogger) -> None: + chunks_path = dir / "chunks" + chunks_path.mkdir(exist_ok=True) + self._store_chunk(chunks_path, self.source_chunk, self.source_hash, log) + self._store_chunk(chunks_path, self.target_chunk, self.target_hash, log) + super().store(dir, log) + + @staticmethod + def _store_chunk( + dir: Path, chunk: bytes, hash: str, log: BoundLogger + ) -> None: + log.debug("store-chunk", hash=hash) + path = dir / hash + if path.exists(): + log.debug("store-chunk-exists", hash=hash) + return + with SafeFile(path) as f: + f.open_new("wb") + f.write(chunk) diff --git a/src/backy/repository.py b/src/backy/repository.py index ee2ae773..97456222 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -1,7 +1,7 @@ +import contextlib import datetime import fcntl import re -from enum import Enum from math import ceil, floor from pathlib import Path from typing import IO, Any, List, Literal, Optional, TypedDict @@ -10,6 +10,7 @@ import yaml from structlog.stdlib import BoundLogger +import backy import backy.source from backy.utils import ( duplicates, @@ -20,25 +21,10 @@ unique, ) +from .report import ProblemReport from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule - -# Locking strategy: -# -# - You can only run one backup of a machine at a time, as the backup will -# interact with this machines' list of snapshots and will get confused -# if run in parallel. -# - You can restore while a backup is running. -# - You can only purge while nothing else is happening. -# - Trying to get a shared lock (specifically purge) will block and wait -# whereas trying to get an exclusive lock (running backups, purging) will -# immediately give up. -# - Locking is not re-entrant. It's forbidden and protected to call another -# locking main function. - - -class RepositoryNotEmpty(RuntimeError): - pass +from .source import Source class StatusDict(TypedDict): @@ -73,100 +59,171 @@ class Repository(object): """ path: Path + report_path: Path + sourcetype: type[backy.source.Source] schedule: Schedule - history: list[Revision] + history: List[Revision] + report_ids: List[str] log: BoundLogger _by_uuid: dict[str, Revision] _lock_fds: dict[str, IO] - sourcetype: type[backy.source.Source] def __init__( self, path: Path, - source: backy.source.Source, + sourcetype: type[backy.source.Source], schedule: Schedule, log: BoundLogger, ): - self.schedule = schedule - self.source = source - self.log = log.bind(subsystem="backup") self.path = path.resolve() + self.report_path = self.path / "quarantine" + self.schedule = schedule + self.sourcetype = sourcetype + self.log = log.bind(subsystem="repo") self._lock_fds = {} def connect(self): self.path.mkdir(parents=True, exist_ok=True) self.scan() - self.source.bind(self) + self.scan_reports() + + def get_source(self): + return self.sourcetype.from_repo(self) @staticmethod def from_config(config: dict[str, Any], log: BoundLogger) -> "Repository": schedule = Schedule() schedule.configure(config["schedule"]) + try: + sourcetype = backy.source.factory_by_type(config["type"]) + except KeyError: + log.error( + "unknown-source-type", + _fmt_msg="Unknown source type '{type}'. You will be limited to metadata only operations...", + type=config["type"], + ) + sourcetype = Source[None] - source = backy.source.factory_by_type( - config["source"]["type"] - ).from_config(config["source"], log) - - return Repository(config["path"], source, schedule, log) + return Repository(Path(config["path"]), sourcetype, schedule, log) - @property - def problem_reports(self) -> list[str]: - return [] + @classmethod + def load(cls, path: Path, log: BoundLogger) -> "Repository": + try: + with path.joinpath("config").open(encoding="utf-8") as f: + config = yaml.safe_load(f) + return cls.from_config(config, log) + except IOError: + log.error( + "could-not-read-config", + _fmt_msg="Could not read config file. Is the path correct?", + config_path=str(path / "config"), + ) + raise + + def store(self) -> None: + with self.path.joinpath("config").open(encoding="utf-8") as f: + yaml.safe_dump(self.to_dict(), f) + + def to_dict(self) -> dict[str, Any]: + return { + "schedule": self.schedule.to_dict(), + "type": self.sourcetype.type_, + "path": str(self.path), + } + + def add_report(self, report: ProblemReport) -> None: + self.log.info("add-report", uuid=report.uuid) + report.store(self.report_path, self.log) + self.report_ids.append(report.uuid) + + def scan_reports(self) -> None: + self.report_ids = [g.name for g in self.report_path.glob("*.report")] + self.log.debug("scan-reports", entries=len(self.report_ids)) + + # Locking strategy: + # + # - You can only run one backup of a machine at a time, as the backup will + # interact with this machines' list of snapshots and will get confused + # if run in parallel. + # - You can restore while a backup is running. + # - You can only purge while nothing else is happening. + # - Trying to get a shared lock (specifically purge) will block and wait + # whereas trying to get an exclusive lock (running backups, purging) will + # immediately give up. + # - Locking is not re-entrant. It's forbidden and protected to call another + # locking main function. - # I placed this on the class because this is usually used in conjunction - # with the class and improves cohesiveness and readability IMHO. @staticmethod - def locked(target=None, mode=None): - if mode == "shared": - mode = fcntl.LOCK_SH - elif mode == "exclusive": - mode = fcntl.LOCK_EX | fcntl.LOCK_NB - else: - raise ValueError("Unknown lock mode '{}'".format(mode)) - + def locked( + target: str, + mode: Literal["shared", "exclusive"], + repo_attr: Optional[str] = None, + ): def wrap(f): def locked_function(self, *args, skip_lock=False, **kw): if skip_lock: return f(self, *args, **kw) - if target in self._lock_fds: - raise RuntimeError("Bug: Locking is not re-entrant.") - target_path = self.path / target - if not target_path.exists(): - target_path.touch() - self._lock_fds[target] = target_path.open() - try: - fcntl.flock(self._lock_fds[target], mode) - except BlockingIOError: - self.log.warning( - "lock-no-exclusive", - _fmt_msg="Failed to get exclusive lock for '{function}'.", - function=f.__name__, - ) - raise + if repo_attr: + repo = getattr(self, repo_attr) else: - try: - return f(self, *args, **kw) - finally: - fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) - finally: - self._lock_fds[target].close() - del self._lock_fds[target] + repo = self + with repo.lock(target, mode, f.__name__): + return f(self, *args, **kw) locked_function.__name__ = "locked({}, {})".format( - f.__name__, target + f.__qualname__, target ) return locked_function return wrap + @contextlib.contextmanager + def lock( + self, + target: str, + mode: Literal["shared", "exclusive"], + logname="", + ): + if mode == "shared": + mode_ = fcntl.LOCK_SH + elif mode == "exclusive": + mode_ = fcntl.LOCK_EX | fcntl.LOCK_NB + else: + raise ValueError("Unknown lock mode '{}'".format(mode)) + + if ( + target in self._lock_fds + ): # FIXME: should this be a class var? dict(path->lock) + raise RuntimeError("Bug: Locking is not re-entrant.") + target_path = self.path / target + if not target_path.exists(): + target_path.touch() + self._lock_fds[target] = target_path.open() + try: + fcntl.flock(self._lock_fds[target], mode_) + except BlockingIOError: + self.log.warning( + "lock-failed", + _fmt_msg="Failed to get '{mode}' lock on '{target}' for '{function}'.", + mode=mode, + target=target, + function=logname, + ) + raise + else: + try: + yield + finally: + fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) + finally: + self._lock_fds[target].close() + del self._lock_fds[target] + @property def name(self) -> str: return self.path.name - def to_dict(self): - return self.config - def scan(self) -> None: self.history = [] self._by_uuid = {} @@ -249,10 +306,6 @@ def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): ) raise RuntimeError("Remote revs disallowed") - @locked(target=".backup", mode="exclusive") - def run_with_backup_lock(self, fun, *args, **kw): - return fun(*args, **kw) - ################# # Making backups diff --git a/src/backy/source.py b/src/backy/source.py index 0712d6de..2b5066ac 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -2,6 +2,7 @@ from importlib.metadata import entry_points from typing import TYPE_CHECKING, Any, Generic, TypeVar +import yaml from structlog.stdlib import BoundLogger if TYPE_CHECKING: @@ -51,17 +52,37 @@ class Source(Generic[RestoreArgsType]): """ - type_: str + type_: str = "" subcommand: str - repository: "Repository" - - def bind(self, repository: "Repository") -> None: - self.repository = repository + @classmethod + def from_repo(cls, repository: "Repository"): + assert ( + repository.sourcetype == cls + ), f"this repo requires a {repository.sourcetype.type_} source and not a {cls.type_} source" + path = repository.path.joinpath(f"source.config") + try: + with path.open(encoding="utf-8") as f: + config = yaml.safe_load(f) + except IOError: + repository.log.error( + "could-not-read-source-config", + _fmt_msg="Could not read source config file. Is the path correct?", + config_path=str(path), + ) + raise + + return cls.from_config(repository, config, repository.log) @classmethod @abstractmethod - def from_config(cls, config: dict[str, Any], log: BoundLogger) -> "Source": + def from_config( + cls, repository: "Repository", config: dict[str, Any], log: BoundLogger + ) -> "Source": + ... + + @abstractmethod + def to_config(self) -> dict[str, Any]: ... @abstractmethod diff --git a/src/backy/rbd/tests/test_quarantine.py b/src/backy/tests/test_report.py similarity index 77% rename from src/backy/rbd/tests/test_quarantine.py rename to src/backy/tests/test_report.py index c134b1cf..dec87946 100644 --- a/src/backy/rbd/tests/test_quarantine.py +++ b/src/backy/tests/test_report.py @@ -1,17 +1,18 @@ -from backy.rbd.quarantine import QuarantineReport, QuarantineStore +from backy.report import ChunkMismatchReport +from backy.repository import Repository from backy.tests import Ellipsis def test_quarantine(tmp_path, log, clock): - store = QuarantineStore(tmp_path, log) - store.add_report(QuarantineReport(b"source", b"target", 3)) + repo = Repository() + repo.add_report(ChunkMismatchReport(b"source", b"target", 3)) with open( - (tmp_path / "quarantine" / store.report_ids[0]).with_suffix(".report") + (tmp_path / "quarantine" / repo.report_ids[0]).with_suffix(".report") ) as report: assert ( Ellipsis( f"""\ -uuid: {store.report_ids[0]} +uuid: {repo.report_ids[0]} source_hash: 36cd38f49b9afa08222c0dc9ebfe35eb target_hash: 42aefbae01d2dfd981f7da7d823d689e offset: 3 diff --git a/src/backy/tests/test_repository.py b/src/backy/tests/test_repository.py new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/src/backy/tests/test_repository.py @@ -0,0 +1 @@ +# TODO diff --git a/src/backy/utils.py b/src/backy/utils.py index 97452a8b..243aa380 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -22,6 +22,8 @@ import structlog import tzlocal +import backy + from .ext_deps import CP _T = TypeVar("_T") @@ -387,8 +389,7 @@ def files_are_roughly_equal( samplesize=0.01, blocksize=CHUNK_SIZE, timeout=5 * 60, - report: Callable[[bytes, bytes, int], None] = lambda a, b, c: None, -) -> bool: +) -> Optional["backy.report.ChunkMismatchReport"]: a.seek(0, os.SEEK_END) size = a.tell() blocks = size // blocksize @@ -412,22 +413,24 @@ def files_are_roughly_equal( duration = now() - started if duration > max_duration: log.info("files-roughly-equal-stopped", duration=duration) - return True + return None a.seek(block * blocksize) b.seek(block * blocksize) chunk_a = a.read(blocksize) chunk_b = b.read(blocksize) if chunk_a != chunk_b: + report = backy.report.ChunkMismatchReport( + chunk_a, chunk_b, block * blocksize + ) log.error( "files-not-roughly-equal", - hash_a=hashlib.md5(chunk_a).hexdigest(), - hash_b=hashlib.md5(chunk_b).hexdigest(), - pos=block * blocksize, + hash_a=report.source_hash, + hash_b=report.target_hash, + pos=report.offset, ) - report(chunk_a, chunk_b, block * blocksize) - return False - return True + return report + return None def now(): From 60f33c92679a4baff250f45609a85eaea1fd3d35 Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Mon, 5 Aug 2024 16:43:53 +0200 Subject: [PATCH 21/25] snapshot: fix all tests except cli --- src/backy/cli/tests/test_client.py | 3 +- src/backy/conftest.py | 51 +- src/backy/daemon/__init__.py | 3 +- src/backy/daemon/scheduler.py | 178 ++----- src/backy/daemon/tests/test_daemon.py | 31 +- src/backy/file/__init__.py | 186 ++----- src/backy/file/tests/test_file.py | 20 +- src/backy/rbd/__init__.py | 689 +++++++++++++++++++++----- src/backy/rbd/chunked/file.py | 31 +- src/backy/rbd/chunked/store.py | 3 +- src/backy/rbd/source.py | 544 -------------------- src/backy/rbd/tests/conftest.py | 19 +- src/backy/rbd/tests/test_ceph.py | 17 +- src/backy/rbd/tests/test_main.py | 125 ++--- src/backy/rbd/tests/test_rbd.py | 2 +- src/backy/rbd/tests/test_source.py | 63 ++- src/backy/repository.py | 50 +- src/backy/revision.py | 4 +- src/backy/source.py | 362 ++++++++++++-- src/backy/tests/conftest.py | 12 - src/backy/tests/test_backup.py | 4 +- src/backy/tests/test_report.py | 17 +- src/backy/tests/test_revision.py | 7 - src/backy/tests/test_schedule.py | 8 +- src/backy/tests/test_utils.py | 10 +- 25 files changed, 1197 insertions(+), 1242 deletions(-) delete mode 100644 src/backy/rbd/source.py delete mode 100644 src/backy/tests/conftest.py diff --git a/src/backy/cli/tests/test_client.py b/src/backy/cli/tests/test_client.py index b2db886f..41debfff 100644 --- a/src/backy/cli/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -6,6 +6,7 @@ from aiohttp.web_exceptions import HTTPUnauthorized from backy import utils +from backy.cli import Command from backy.daemon.api import BackyAPI, Client from backy.report import ChunkMismatchReport from backy.revision import Revision @@ -67,7 +68,7 @@ async def api_client(api, aiohttp_client, log): @pytest.fixture async def cli_client(api_client, log): - return CLIClient(api_client, log) + return Command(api_client, log) async def test_cli_jobs(cli_client, capsys): diff --git a/src/backy/conftest.py b/src/backy/conftest.py index 77129f36..80242b96 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -10,7 +10,19 @@ import backy.logging import backy.schedule +import backy.source from backy import utils +from backy.file import FileSource +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule + + +def create_rev(repository, tags) -> Revision: + r = Revision.create(repository, tags, repository.log) + r.materialize() + repository.scan() + return repository.find_by_uuid(r.uuid) @pytest.fixture @@ -37,15 +49,6 @@ def pytest_assertrepr_compare(op, left, right): return report.diff -@pytest.fixture(autouse=True) -def log(monkeypatch): - def noop_init_logging(*args, **kwargs): - pass - - monkeypatch.setattr(backy.logging, "init_logging", noop_init_logging) - return structlog.stdlib.get_logger() - - @pytest.fixture(autouse=True) def fix_cwd(): cwd = os.getcwd() @@ -74,11 +77,27 @@ def seed_random(monkeypatch): @pytest.fixture def schedule(): - schedule = backy.schedule.Schedule() + schedule = Schedule() schedule.configure({"daily": {"interval": "1d", "keep": 5}}) return schedule +@pytest.fixture +def repository(tmp_path, schedule, log): + repo = Repository(tmp_path, schedule, log) + repo.connect() + return repo + + +@pytest.fixture(autouse=True) +def log(monkeypatch): + def noop_init_logging(*args, **kwargs): + pass + + monkeypatch.setattr(backy.logging, "init_logging", noop_init_logging) + return structlog.stdlib.get_logger() + + @pytest.fixture(scope="session") def setup_structlog(): utils.log_data = "" @@ -94,3 +113,15 @@ def msg(self, message: str): @pytest.fixture(autouse=True) def reset_structlog(setup_structlog): utils.log_data = "" + + +@pytest.fixture(autouse=True) +def no_subcommand(monkeypatch): + def sync_invoke(self, *args): + return FileSource.main(*args) + + async def async_invoke(self, *args): + return FileSource.main(*args) + + monkeypatch.setattr(backy.source.CmdLineSource, "invoke", sync_invoke) + monkeypatch.setattr(backy.source.AsyncCmdLineSource, "invoke", async_invoke) diff --git a/src/backy/daemon/__init__.py b/src/backy/daemon/__init__.py index f72c5479..225b3235 100644 --- a/src/backy/daemon/__init__.py +++ b/src/backy/daemon/__init__.py @@ -144,6 +144,7 @@ def _apply_config(self): try: self.dead_repositories[b.name] = Repository( self.base_dir / b.name, + Schedule(), self.log.bind(job_name=b.name), ) self.log.info("found-backup", job_name=b.name) @@ -370,7 +371,7 @@ def status( else None ), manual_tags=", ".join(manual_tags), - problem_reports=job.repository.problem_reports, + problem_reports=len(job.repository.report_ids), unsynced_revs=unsynced_revs, local_revs=len( job.repository.get_history(clean=True, local=True) diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index fd717415..2e15aee8 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -1,8 +1,6 @@ import asyncio import datetime -import filecmp import hashlib -import os import random import subprocess from collections import defaultdict @@ -16,17 +14,12 @@ from structlog.stdlib import BoundLogger import backy.utils -from backy.ext_deps import BACKY_CLI_CMD, BACKY_RBD_CMD from backy.repository import Repository from backy.revision import Revision from backy.schedule import Schedule -from backy.utils import ( - SafeFile, - format_datetime_local, - generate_taskid, - time_or_event, -) +from backy.utils import format_datetime_local, generate_taskid, time_or_event +from ..source import AsyncCmdLineSource from .api import Client, ClientManager if TYPE_CHECKING: @@ -34,15 +27,17 @@ from backy.repository import StatusDict +def locked(target: str, mode: Literal["shared", "exclusive"]): + return Repository.locked(target, mode, repo_attr="repository") + + class Job(object): name: str - source: dict - schedule_name: str + source: AsyncCmdLineSource status: str = "" next_time: Optional[datetime.datetime] = None next_tags: Optional[set[str]] = None path: Path - repository: Repository logfile: Path last_config: Optional[dict] = None daemon: "BackyDaemon" @@ -63,10 +58,12 @@ def __init__(self, daemon: "BackyDaemon", name: str, log: BoundLogger): self.logfile = self.path / "backy.log" def configure(self, config: dict) -> None: - self.source = config["source"] - self.schedule_name = config["schedule"] - self.update_config() - self.repository = Repository(self.path, self.log) + repository = Repository( + self.path, self.daemon.schedules[config["schedule"]], self.log + ) + repository.connect() + self.source = AsyncCmdLineSource(repository, config["source"], self.log) + self.source.store() self.last_config = config @property @@ -103,29 +100,16 @@ def sla_overdue(self) -> int: @property def schedule(self) -> Schedule: - return self.daemon.schedules[self.schedule_name] + return self.repository.schedule + + @property + def repository(self) -> Repository: + return self.source.repository def update_status(self, status: str) -> None: self.status = status self.log.debug("updating-status", status=self.status) - def update_config(self) -> None: - """Writes config file for 'backy backup' subprocess.""" - - # We do not want to create leading directories, only - # the backup directory itself. If the base directory - # does not exist then we likely don't have a correctly - # configured environment. - self.path.mkdir(exist_ok=True) - config = self.path / "config" - with SafeFile(config, encoding="utf-8") as f: - f.open_new("wb") - yaml.safe_dump( - {"source": self.source, "schedule": self.schedule.config}, f - ) - if config.exists() and filecmp.cmp(config, f.name): - raise ValueError("not changed") - def to_dict(self) -> dict: return { "name": self.name, @@ -234,9 +218,16 @@ async def run_forever(self) -> None: self.log.debug("loop-started") while True: self.taskid = generate_taskid() + # TODO: use contextvars self.log = self.log.bind(job_name=self.name, sub_taskid=self.taskid) - self.repository = Repository(self.path, self.log) + self.source.log = self.source.log.bind( + job_name=self.name, sub_taskid=self.taskid + ) + self.repository.log = self.repository.log.bind( + job_name=self.name, sub_taskid=self.taskid + ) + self.repository.connect() next_time, next_tags = self.schedule.next( backy.utils.now(), self.spread, self.repository @@ -271,9 +262,7 @@ async def run_forever(self) -> None: if not run_immediately and await self._wait_for_leader( next_time ): - await self.repository.run_with_backup_lock( - self.pull_metadata, self.daemon.peers, self.taskid - ) + await self.pull_metadata() await self.run_callback() else: speed = "slow" @@ -291,6 +280,7 @@ async def run_forever(self) -> None: self.repository._clean() await self.run_backup(next_tags) self.repository.scan() + self.repository._clean() await self.pull_metadata() await self.run_expiry() await self.push_metadata() @@ -319,40 +309,12 @@ async def run_forever(self) -> None: async def run_backup(self, tags: Set[str]) -> None: self.log.info("backup-started", tags=", ".join(tags)) + r = Revision.create(self.repository, tags, self.log) r.materialize() - proc = await asyncio.create_subprocess_exec( - BACKY_RBD_CMD, - "-t", - self.taskid, - "-b", - str(self.path), - "backup", - r.uuid, - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "backup-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"Backup failed with return code {return_code}" - ) - except asyncio.CancelledError: - self.log.warning("backup-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise + return_code = await self.source.backup(r) + if return_code: + raise RuntimeError(f"Backup failed with return code {return_code}") async def run_expiry(self) -> None: self.log.info("expiry-started") @@ -361,33 +323,7 @@ async def run_expiry(self) -> None: async def run_gc(self) -> None: self.log.info("gc-started") - proc = await asyncio.create_subprocess_exec( - BACKY_RBD_CMD, - "-t", - self.taskid, - "-b", - str(self.path), - "gc", - # start_new_session=True, # Avoid signal propagation like Ctrl-C. - # close_fds=True, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "gc-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("gc-cancelled", subprocess_pid=proc.pid) - try: - proc.terminate() - except ProcessLookupError: - pass - raise + await self.source.gc() async def run_callback(self) -> None: if not self.daemon.backup_completed_callback: @@ -395,51 +331,31 @@ async def run_callback(self) -> None: return self.log.info("callback-started") - read, write = os.pipe() - # TODO - backy_proc = await asyncio.create_subprocess_exec( - BACKY_CLI_CMD, - "-b", - str(self.path), - "-l", - str(self.logfile), - "status", - "--yaml", - stdin=subprocess.DEVNULL, - stdout=write, - stderr=subprocess.DEVNULL, - ) - os.close(write) + status = yaml.safe_dump( + [r.to_dict() for r in self.repository.history] + ).encode("utf-8") + callback_proc = await asyncio.create_subprocess_exec( str(self.daemon.backup_completed_callback), self.name, - stdin=read, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - os.close(read) try: - stdout, stderr = await callback_proc.communicate() - return_code1 = await backy_proc.wait() + stdout, stderr = await callback_proc.communicate(status) self.log.info( "callback-finished", - return_code1=return_code1, - return_code2=callback_proc.returncode, - subprocess_pid1=backy_proc.pid, - subprocess_pid2=callback_proc.pid, + return_code=callback_proc.returncode, + subprocess_pid=callback_proc.pid, stdout=stdout.decode() if stdout else None, stderr=stderr.decode() if stderr else None, ) except asyncio.CancelledError: self.log.warning( "callback-cancelled", - subprocess_pid1=backy_proc.pid, - subprocess_pid2=callback_proc.pid, + subprocess_pid=callback_proc.pid, ) - try: - backy_proc.terminate() - except ProcessLookupError: - pass try: callback_proc.terminate() except ProcessLookupError: @@ -462,11 +378,8 @@ def stop(self) -> None: self._task = None self.update_status("") + @locked(target=".backup", mode="exclusive") async def push_metadata(self) -> int: - with self.repository.locked(target=".backup", mode="exclusive"): - return await self._push_metadata() - - async def _push_metadata(self) -> int: grouped = defaultdict(list) for r in self.repository.clean_history: if r.pending_changes: @@ -535,11 +448,8 @@ async def _push_metadata_single( error = True return error + @locked(target=".backup", mode="exclusive") async def pull_metadata(self) -> int: - with self.repository.locked(target=".backup", mode="exclusive"): - return await self._pull_metadata() - - async def _pull_metadata(self) -> int: async def remove_dead_peer(): for r in list(self.repository.history): if r.server and r.server not in self.daemon.peers: diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index 0b6ded26..257060b7 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -13,6 +13,7 @@ from backy import utils from backy.daemon import BackyDaemon from backy.daemon.scheduler import Job +from backy.file import FileSource from backy.revision import Revision from backy.tests import Ellipsis @@ -145,7 +146,7 @@ async def send_sighup(): assert signal_task not in all_tasks -async def test_run_backup(daemon, rbdrepository, log): +async def test_run_backup(daemon, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) @@ -153,7 +154,8 @@ async def test_run_backup(daemon, rbdrepository, log): assert len(job.repository.history) == 1 revision = job.repository.history[0] assert revision.tags == {"manual:asdf"} - with rbdrepository.open(revision) as f: + source = job.source.create_source(FileSource) + with source._path_for_revision(revision).open("rb") as f: assert f.read() == b"I am your father, Luke!" # Run again. This also covers the code path that works if @@ -163,7 +165,7 @@ async def test_run_backup(daemon, rbdrepository, log): assert len(job.repository.history) == 2 revision = job.repository.history[1] assert revision.tags == {"manual:asdf"} - with rbdrepository.open(revision) as f: + with source._path_for_revision(revision).open("rb") as f: assert f.read() == b"I am your father, Luke!" @@ -171,6 +173,7 @@ async def test_run_callback(daemon, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) + job.repository.scan() await job.run_callback() with open("test01.callback_stdin", "r") as f: @@ -364,40 +367,40 @@ async def wait_for_job_finished(): Ellipsis( """\ ... D test01[...] job/loop-started \n\ -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-02 07:32:51' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=120 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:08:47' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=240 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:10:47' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=480 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:14:47' ... I test01[...] job/stop \n\ -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-02 07:32:51' """ ) diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index d6dd37d4..afeff307 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -1,41 +1,59 @@ -import argparse -import errno import shutil import sys +import time +from argparse import ArgumentParser, Namespace +from dataclasses import dataclass from pathlib import Path -from typing import Any +from typing import Any, Iterable -import structlog from structlog.stdlib import BoundLogger from backy.revision import Revision -from backy.source import Source -from backy.utils import generate_taskid +from backy.source import RestoreArgs, Source -from .. import logging from ..repository import Repository -class FileSource(Source): +@dataclass(frozen=True) +class FileRestoreArgs(RestoreArgs): + target: Path + + def to_cmdargs(self) -> Iterable[str]: + return [str(self.target)] + + @classmethod + def from_args(cls, args: Namespace) -> "FileRestoreArgs": + return cls(args.target) + + @classmethod + def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + restore_parser.add_argument( + "target", + type=Path, + metavar="TARGET", + help="Copy backed up revision to TARGET", + ) + + +class FileSource(Source[FileRestoreArgs]): type_ = "file" - subcommand = "backy-file" + restore_type = FileRestoreArgs - repository: Repository - path: Path # the source we are backing up + filename: Path # the source we are backing up - def __init__(self, repository: Repository, path: Path): - self.repository = repository - self.path = path + def __init__(self, repository: Repository, filename: Path): + super().__init__(repository) + self.filename = filename @classmethod def from_config( cls, repository: Repository, config: dict[str, Any], log: BoundLogger ) -> "FileSource": - assert cls.type_ == config["type"] - return cls(repository, Path(config["path"])) + assert config["type"] == "file" + return cls(repository, Path(config["filename"])) - def to_config(self) -> dict[str, Any]: - return {"type": self.type_, "path": str(self.path)} + # def to_config(self) -> dict[str, Any]: + # return {"type": self.type_, "path": str(self.path)} def _path_for_revision(self, revision: Revision) -> Path: return self.repository.path / revision.uuid @@ -43,13 +61,18 @@ def _path_for_revision(self, revision: Revision) -> Path: def backup(self, revision: Revision): backup = self._path_for_revision(revision) assert not backup.exists() - shutil.copy(self.path, backup) + start = time.time() + shutil.copy(self.filename, backup) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() + return True - def restore(self, revision: Revision, target: Path): - shutil.copy(self._path_for_revision(revision), target) + def restore(self, revision: Revision, args: FileRestoreArgs): + shutil.copy(self._path_for_revision(revision), args.target) def gc(self): - files = set(self.repository.path.glob("*")) + files = set(self.repository.path.glob("*.rev")) expected_files = set( (self.repository.path / r.uuid) for r in self.repository.get_history() @@ -57,122 +80,9 @@ def gc(self): for file in files - expected_files: file.unlink() - def verify(self): - for revision in self.repository.get_history(): - assert self._path_for_revision(revision).exists() + def verify(self, revision: Revision): + assert self._path_for_revision(revision).exists() def main(): - parser = argparse.ArgumentParser( - description="""Backup and restore for individual files. - -This is mostly a dummy implementation to assist testing and development: -it is only able to back up from a single file and store versions of it -in a very simplistic fashion. -""", - ) - parser.add_argument( - "-v", "--verbose", action="store_true", help="verbose output" - ) - parser.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - parser.add_argument( - "-C", - default=".", - type=Path, - help=( - "Run as if backy was started in instead of the current " - "working directory." - ), - ) - parser.add_argument( - "-t", - "--taskid", - default=generate_taskid(), - help="ID to include in log messages (default: 4 random base32 chars)", - ) - - subparsers = parser.add_subparsers() - - # BACKUP - p = subparsers.add_parser( - "backup", - help="Perform a backup", - ) - p.set_defaults(func="backup") - p.add_argument("revision", help="Revision to create.") - - # RESTORE - p = subparsers.add_parser( - "restore", - help="Restore (a given revision) to a given target", - ) - p.add_argument("revision", help="Revision to restore.") - p.add_argument( - "target", - metavar="TARGET", - help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', - ) - p.set_defaults(func="restore") - - # GC - p = subparsers.add_parser( - "gc", - help="Remove unused data from the repository.", - ) - p.set_defaults(func="gc") - - # VERIFY - p = subparsers.add_parser( - "verify", - help="Verify specified revision", - ) - p.add_argument("revision", help="Revision to work on.") - p.set_defaults(func="verify") - - args = parser.parse_args() - - if not hasattr(args, "func"): - parser.print_usage() - sys.exit(0) - - # Logging - logging.init_logging( - args.verbose, - args.backupdir / "backy.log", - defaults={"taskid": args.taskid}, - ) - log = structlog.stdlib.get_logger(subsystem="command") - log.debug("invoked", args=" ".join(sys.argv)) - - try: - b = FileSource() - Repository.f - # XXX scheduler? - b._clean() - ret = 0 - match args.func: - case "backup": - success = b.backup(args.revision) - ret = int(not success) - case "restore": - b.restore(args.revisions, args.target, args.backend) - case "gc": - b.gc() - case "verify": - b.verify(args.revision) - case _: - raise ValueError("invalid function: " + args.fun) - log.debug("return-code", code=ret) - sys.exit(ret) - except Exception as e: - if isinstance(e, IOError) and e.errno in [errno.EDEADLK, errno.EAGAIN]: - log.warning("backup-currently-locked") - else: - log.exception("failed") - sys.exit(1) + sys.exit(FileSource.main(*sys.argv)) diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py index f8d2d352..da0f0927 100644 --- a/src/backy/file/tests/test_file.py +++ b/src/backy/file/tests/test_file.py @@ -1,16 +1,15 @@ -import yaml - -from backy.file import FileSource +from backy.file import FileRestoreArgs, FileSource from backy.repository import Repository from backy.revision import Revision from backy.schedule import Schedule +from backy.source import CmdLineSource def test_bootstrap_from_api(tmp_path, log): original = tmp_path / "original.txt" schedule = Schedule() - repository = Repository(tmp_path / "repository", FileSource, schedule, log) + repository = Repository(tmp_path / "repository", schedule, log) repository.connect() source = FileSource(repository, original) @@ -22,22 +21,19 @@ def test_bootstrap_from_config(tmp_path, log): repo_path = tmp_path / "repository" - repo_conf = { + conf = { "path": repo_path, "schedule": {}, - "type": "file", + "source": {"type": "file", "filename": str(original)}, } - source_conf = {"type": "file", "path": str(original)} - repository = Repository.from_config(repo_conf, log) - repository.connect() - source = FileSource.from_config(repository, source_conf, log) + source = CmdLineSource.from_config(conf, log).create_source(FileSource) exercise_fresh_repo(source) def exercise_fresh_repo(source: FileSource): - original = source.path + original = source.filename with open(original, "w") as f: f.write("This is the original file.") @@ -52,6 +48,6 @@ def exercise_fresh_repo(source: FileSource): assert original.read_text() == "This is the wrong file." - source.restore(revision, original) + source.restore(revision, FileRestoreArgs(original)) assert original.read_text() == "This is the original file." diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 4214f446..365ff511 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -1,133 +1,574 @@ -import argparse -import errno +import json +import os +import subprocess import sys +import time +import uuid +from argparse import ArgumentParser, Namespace +from dataclasses import dataclass +from enum import Enum from pathlib import Path +from typing import IO, Any, Iterable, Literal, Optional, Set, cast -import structlog +import consulate +from structlog.stdlib import BoundLogger -from backy.utils import generate_taskid +import backy +import backy.utils +from backy.ext_deps import BACKY_EXTRACT +from backy.report import ChunkMismatchReport +from backy.repository import Repository +from backy.revision import Revision, Trust +from backy.source import RestoreArgs, RestoreArgsType, Source +from backy.utils import ( + CHUNK_SIZE, + END, + TimeOut, + TimeOutError, + copy, + posix_fadvise, + report_status, +) -from .. import logging -from ..repository import Repository -from .source import RBDSource, RestoreArgs, RestoreBackend +from .chunked import BackendException, Chunk, File, Hash, Store +from .rbd import RBDClient -def main(): - parser = argparse.ArgumentParser( - description="Backup and restore for block devices.", - ) - - parser.add_argument( - "-v", "--verbose", action="store_true", help="verbose output" - ) - parser.add_argument( - "-b", - "--backupdir", - default=".", - type=Path, - help=( - "directory where backups and logs are written to " - "(default: %(default)s)" - ), - ) - parser.add_argument( - "-t", - "--taskid", - default=generate_taskid(), - help="ID to include in log messages (default: 4 random base32 chars)", - ) - - subparsers = parser.add_subparsers() - - # BACKUP - p = subparsers.add_parser( - "backup", - help="Perform a backup", - ) - p.set_defaults(func="backup") - p.add_argument("revision", help="Revision to work on.") - - # RESTORE - p = subparsers.add_parser( - "restore", - help="Restore (a given revision) to a given target", - ) - p.add_argument( - "--backend", - type=RestoreBackend, - choices=list(RestoreBackend), - default=RestoreBackend.AUTO, - dest="restore_backend", - help="(default: %(default)s)", - ) - p.add_argument("revision", help="Revision to work on.") - p.add_argument( - "target", - metavar="TARGET", - help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', - ) - p.set_defaults(func="restore") - - # GC - p = subparsers.add_parser( - "gc", - help="Purge the backup store from unused data", - ) - p.set_defaults(func="gc") - - # VERIFY - p = subparsers.add_parser( - "verify", - help="Verify specified revision", - ) - p.add_argument("revision", help="Revision to work on.") - p.set_defaults(func="verify") - - args = parser.parse_args() - - if not hasattr(args, "func"): - parser.print_usage() - sys.exit(0) - - # Logging - logging.init_logging( - args.verbose, - args.backupdir / "backy.log", - defaults={"taskid": args.taskid}, - ) - log = structlog.stdlib.get_logger(subsystem="command") - log.debug("invoked", args=" ".join(sys.argv)) - - # TODO add init here? - - try: - repo = Repository.load(args.backupdir, log) - repo.connect() - source = RBDSource.from_repo(repo) - - ret = 0 - match args.func: - case "backup": - success = source.backup(repo.find_by_uuid(args.revision)) - ret = int(not success) - case "restore": - source.restore( - repo.find_by_uuid(args.revision), - RestoreArgs( - target=args.target, backend=args.restore_backend - ), +def locked(target: str, mode: Literal["shared", "exclusive"]): + return Repository.locked(target, mode, repo_attr="repository") + + +class RestoreBackend(Enum): + AUTO = "auto" + PYTHON = "python" + RUST = "rust" + + def __str__(self): + return self.value + + +@dataclass(frozen=True) +class RBDRestoreArgs(RestoreArgs): + target: str + backend: RestoreBackend = RestoreBackend.AUTO + + def to_cmdargs(self) -> Iterable[str]: + return ["--backend", self.backend.value, self.target] + + @classmethod + def from_args(cls, args: Namespace) -> "RBDRestoreArgs": + return cls(args.target, args.restore_backend) + + @classmethod + def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + restore_parser.add_argument( + "--backend", + type=RestoreBackend, + choices=list(RestoreBackend), + default=RestoreBackend.AUTO, + dest="restore_backend", + help="(default: %(default)s)", + ) + restore_parser.add_argument( + "target", + metavar="TARGET", + help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', + ) + + +class RBDSource(Source[RBDRestoreArgs]): + type_ = "rbd" + restore_type = RBDRestoreArgs + + ceph_rbd: "CephRBD" + store: Store + log: BoundLogger + + def __init__( + self, repository: Repository, ceph_rbd: "CephRBD", log: BoundLogger + ): + super().__init__(repository) + self.log = log.bind(subsystem="rbdsource") + self.ceph_rbd = ceph_rbd + self.store = Store(repository.path / "chunks", self.log) + + @classmethod + def from_config( + cls, repository: Repository, config: dict[str, Any], log: BoundLogger + ) -> "RBDSource": + assert config["type"] == "rbd" + return cls(repository, CephRBD.from_config(config, log), log) + + def _path_for_revision(self, revision: Revision) -> Path: + return self.repository.path / revision.uuid + + def open( + self, + revision: Revision, + mode: str = "rb", + parent: Optional[Revision] = None, + ) -> File: + if "w" in mode or "+" in mode: + if parent and not self._path_for_revision(revision).exists(): + with self._path_for_revision(revision).open( + "wb" + ) as new, self._path_for_revision(parent).open("rb") as old: + # This is ok, this is just metadata, not the actual data. + new.write(old.read()) + file = File(self._path_for_revision(revision), self.store, mode) + + if file.writable() and self.repository.contains_distrusted: + # "Force write"-mode if any revision is distrusted. + self.log.warn("forcing-full") + self.store.force_writes = True + + return file + + ################# + # Making backups + + @locked(target=".backup", mode="exclusive") + @locked(target=".purge", mode="shared") + def backup(self, revision: Revision) -> bool: + self.repository.path.joinpath("last").unlink(missing_ok=True) + self.repository.path.joinpath("last.rev").unlink(missing_ok=True) + + start = time.time() + + if not self.ceph_rbd.ready(): + raise RuntimeError( + "Source is not ready (does it exist? can you access it?)" + ) + + try: + with self.ceph_rbd(revision) as source: + parent_rev = source.get_parent() + with self.open(revision, "wb", parent_rev) as file: + if parent_rev: + source.diff(file, parent_rev) + else: + source.full(file) + with self.open(revision) as file: + report = source.verify(file) + if report: + self.repository.add_report(report) + verified = not report + except BackendException: + self.log.exception("ceph-error-distrust-all") + verified = False + self.repository.distrust("local", skip_lock=True) + if not verified: + self.log.error( + "verification-failed", + revision_uuid=revision.uuid, + ) + revision.remove() + else: + self.log.info("verification-ok", revision_uuid=revision.uuid) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() + # Switched from a fine-grained syncing mechanism to "everything + # once" when we're done. This is as safe but much faster. + os.sync() + + # If there are distrusted revisions, then perform at least one + # verification after a backup - for good measure and to keep things + # moving along automatically. This could also be moved into the + # scheduler. + self.repository.scan() + # TODO: move this to cli/daemon? + for revision in reversed( + self.repository.get_history(clean=True, local=True) + ): + if revision.trust == Trust.DISTRUSTED: + self.log.warning("inconsistent") + self.verify(revision, skip_lock=True) + break + return verified + + @locked(target=".purge", mode="shared") + @report_status + def verify(self, revision: Revision): + log = self.log.bind(revision_uuid=revision.uuid) + log.info("verify-start") + verified_chunks: Set[Hash] = set() + + # Load verified chunks to avoid duplicate work + for verified_revision in self.repository.get_history( + clean=True, local=True + ): + if verified_revision.trust != Trust.VERIFIED: + continue + verified_chunks.update( + self.open(verified_revision)._mapping.values() + ) + + log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) + + errors = False + # Go through all chunks and check them. Delete problematic ones. + f = self.open(revision) + hashes = set(f._mapping.values()) - verified_chunks + yield len(hashes) + 2 + for candidate in hashes: + yield + if candidate in verified_chunks: + continue + try: + c = Chunk(self.store, candidate) + c._read_existing() + except Exception: + log.exception("verify-error", chunk=candidate) + errors = True + try: + self.store.chunk_path(candidate).unlink(missing_ok=True) + except Exception: + log.exception("verify-remove-error", chunk=candidate) + # This is an optimisation: we can skip this revision, purge it + # and then keep verifying other chunks. This avoids checking + # things unnecessarily in duplicate. + # And we only mark it as verified if we never saw any problems. + break + + yield + + # TODO: move this to cli/daemon? + if errors: + # Found any issues? Delete this revision as we can't trust it. + revision.remove() + else: + # No problems found - mark as verified. + revision.verify() + revision.write_info() + + yield + + # Purge to ensure that we don't leave unused, potentially untrusted + # stuff around, especially if this was the last revision. + self.gc(skip_lock=True) + + yield END + yield None + + @locked(target=".purge", mode="exclusive") + def gc(self) -> None: + self.log.debug("purge") + used_chunks: Set[Hash] = set() + # TODO: also remove mapping file + # TODO: purge quarantine store + for revision in self.repository.local_history: + used_chunks.update(self.open(revision)._mapping.values()) + self.store.purge(used_chunks) + # TODO: move this to cli/daemon? + self.repository.clear_purge_pending() + + ################# + # Restoring + + # This needs no locking as it's only a wrapper for restore_file and + # restore_stdout and locking isn't re-entrant. + def restore(self, revision: Revision, args: RBDRestoreArgs) -> None: + s = self.open(revision) + restore_backend = args.backend + if restore_backend == RestoreBackend.AUTO: + if self.backy_extract_supported(s): + restore_backend = RestoreBackend.RUST + else: + restore_backend = RestoreBackend.PYTHON + self.log.info("restore-backend", backend=restore_backend.value) + if restore_backend == RestoreBackend.PYTHON: + with s as source: + if args.target != "-": + self.restore_file(source, args.target) + else: + self.restore_stdout(source) + elif restore_backend == RestoreBackend.RUST: + self.restore_backy_extract(revision, args.target) + + def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: + log = self.log.bind(subsystem="backy-extract") + if file.size % CHUNK_SIZE != 0: + log.debug("not-chunk-aligned") + return False + try: + version = subprocess.check_output( + [BACKY_EXTRACT, "--version"], + encoding="utf-8", + errors="replace", + ) + if not version.startswith("backy-extract"): + log.debug("unknown-version") + return False + except Exception: + log.debug("unavailable") + return False + return True + + # backy-extract acquires lock + def restore_backy_extract(self, rev: Revision, target: str) -> None: + log = self.log.bind(subsystem="backy-extract") + cmd = [BACKY_EXTRACT, str(self.repository.path / rev.uuid), target] + log.debug("started", cmd=cmd) + proc = subprocess.Popen(cmd) + return_code = proc.wait() + log.info( + "finished", + return_code=return_code, + subprocess_pid=proc.pid, + ) + if return_code: + raise RuntimeError( + f"backy-extract failed with return code {return_code}. " + "Maybe try `--backend python`?" + ) + + @locked(target=".purge", mode="shared") + def restore_file(self, source: IO, target_name: str) -> None: + """Bulk-copy from open revision `source` to target file.""" + self.log.debug("restore-file", source=source.name, target=target_name) + open(target_name, "ab").close() # touch into existence + with open(target_name, "r+b", buffering=CHUNK_SIZE) as target: + try: + posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore + except Exception: + pass + copy(source, target) + + @locked(target=".purge", mode="shared") + def restore_stdout(self, source: IO) -> None: + """Emit restore data to stdout (for pipe processing).""" + self.log.debug("restore-stdout", source=source.name) + try: + posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore + except Exception: + pass + with os.fdopen(os.dup(1), "wb") as target: + while True: + chunk = source.read(CHUNK_SIZE) + if not chunk: + break + target.write(chunk) + + +class CephRBD: + """The Ceph RBD source. + + Manages snapshots corresponding to revisions and provides a verification + that tries to balance reliability and performance. + """ + + pool: str + image: str + always_full: bool + vm: Optional[str] + consul_acl_token: Optional[str] + rbd: RBDClient + revision: Revision + log: BoundLogger + + snapshot_timeout = 90 + + def __init__( + self, + pool: str, + image: str, + log: BoundLogger, + vm: Optional[str] = None, + consul_acl_token: Optional[str] = None, + always_full: bool = False, + ): + self.pool = pool + self.image = image + self.always_full = always_full + self.vm = vm + self.consul_acl_token = consul_acl_token + self.log = log.bind(subsystem="ceph") + self.rbd = RBDClient(self.log) + + @classmethod + def from_config(cls, config: dict, log: BoundLogger) -> "CephRBD": + return cls( + config["pool"], + config["image"], + log, + config.get("vm"), + config.get("consul_acl_token"), + config.get("full-always", False), + ) + + def ready(self) -> bool: + """Check whether the source can be backed up. + + For RBD sources this means the volume exists and is accessible. + + """ + try: + if self.rbd.exists(self._image_name): + return True + except Exception: + self.log.exception("not-ready") + return False + + def __call__(self, revision): + self.revision = revision + return self + + def __enter__(self): + snapname = "backy-{}".format(self.revision.uuid) + self.create_snapshot(snapname) + return self + + def create_snapshot(self, name: str) -> None: + if not self.consul_acl_token or not self.vm: + self.rbd.snap_create(self._image_name + "@" + name) + return + + consul = consulate.Consul(token=self.consul_acl_token) + snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) + self.log.info( + "creating-snapshot", + snapshot_name=name, + snapshot_key=snapshot_key, + ) + + consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} + + time.sleep(3) + try: + timeout = TimeOut( + self.snapshot_timeout, interval=2, raise_on_timeout=True + ) + while timeout.tick(): + for snapshot in self.rbd.snap_ls(self._image_name): + if snapshot["name"] == name: + return + except TimeOutError: + # The VM might have been shut down. Try doing a regular Ceph + # snapshot locally. + self.rbd.snap_create(self._image_name + "@" + name) + except KeyboardInterrupt: + raise + finally: + # In case the snapshot still gets created: the general snapshot + # deletion code in ceph/source will clean up unused backy snapshots + # anyway. However, we need to work a little harder to delete old + # snapshot requests, otherwise we've sometimes seen those not + # getting deleted and then re-created all the time. + for key in list(consul.kv.find("snapshot/")): + try: + s = consul.kv[key] + except KeyError: + continue + try: + s = json.loads(s) + except json.decoder.JSONDecodeError: + # Clean up garbage. + self.log.warning( + "create-snapshot-removing-garbage-request", + snapshot_key=key, + ) + del consul.kv[key] + if s["vm"] != self.vm: + continue + # The knowledge about the `backy-` prefix isn't properly + # encapsulated here. + if s["snapshot"].startswith("backy-"): + self.log.info( + "create-snapshot-removing-request", + vm=s["vm"], + snapshot_name=s["snapshot"], + snapshot_key=key, + ) + del consul.kv[key] + + @property + def _image_name(self) -> str: + return "{}/{}".format(self.pool, self.image) + + def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): + self._delete_old_snapshots() + + def get_parent(self) -> Optional[Revision]: + if self.always_full: + self.log.info("backup-always-full") + return None + revision = self.revision + while True: + parent = revision.get_parent() + if not parent: + self.log.info("backup-no-valid-parent") + return None + if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): + self.log.info( + "ignoring-rev-without-snapshot", + revision_uuid=parent.uuid, ) - case "gc": - source.gc() - case "verify": - source.verify(repo.find_by_uuid(args.revision)) - case _: - raise ValueError("invalid function: " + args.fun) - log.debug("return-code", code=ret) - sys.exit(ret) - except Exception as e: - if isinstance(e, IOError) and e.errno in [errno.EDEADLK, errno.EAGAIN]: - log.warning("backup-currently-locked") + revision = parent + continue + # Ok, it's trusted and we have a snapshot. Let's do a diff. + return parent + + def diff(self, target: File, parent: Revision) -> None: + self.log.info("diff") + snap_from = "backy-" + parent.uuid + snap_to = "backy-" + self.revision.uuid + s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) + with s as source: + source.integrate(target, snap_from, snap_to) + self.log.info("diff-integration-finished") + + def full(self, target: File) -> None: + self.log.info("full") + s = self.rbd.export( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + with s as source: + while buf := source.read(4 * backy.utils.MiB): + target.write(buf) + + def verify(self, target: File) -> Optional[ChunkMismatchReport]: + s = self.rbd.image_reader( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + self.revision.stats["ceph-verification"] = "partial" + + with s as source: + self.log.info("verify") + return backy.utils.files_are_roughly_equal(source, cast(IO, target)) + + def _delete_old_snapshots(self) -> None: + # Clean up all snapshots except the one for the most recent valid + # revision. + # Previously we used to remove all snapshots but the one for this + # revision - which is wrong: broken new revisions would always cause + # full backups instead of new deltas based on the most recent valid + # one. + # XXX this will break if multiple servers are active + if not self.always_full and self.revision.repository.local_history: + keep_snapshot_revision = self.revision.repository.local_history[ + -1 + ].uuid else: - log.exception("failed") - sys.exit(1) + keep_snapshot_revision = None + for snapshot in self.rbd.snap_ls(self._image_name): + if not snapshot["name"].startswith("backy-"): + # Do not touch non-backy snapshots + continue + uuid = snapshot["name"].replace("backy-", "") + if uuid != keep_snapshot_revision: + time.sleep(3) # avoid race condition while unmapping + self.log.info( + "delete-old-snapshot", snapshot_name=snapshot["name"] + ) + try: + self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) + except Exception: + self.log.exception( + "delete-old-snapshot-failed", + snapshot_name=snapshot["name"], + ) + + +def main(): + sys.exit(RBDSource.main(*sys.argv)) diff --git a/src/backy/rbd/chunked/file.py b/src/backy/rbd/chunked/file.py index 24aa0a07..ced489e0 100644 --- a/src/backy/rbd/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -37,6 +37,7 @@ class File(object): stats: dict closed: bool size: int + mode: str _position: int _access_stats: dict[int, Tuple[int, float]] # (count, last) @@ -47,6 +48,7 @@ def __init__( self, name: str | os.PathLike, store: "Store", + mode: str = "rw", stats: Optional[dict] = None, ): self.name = str(name) @@ -59,6 +61,17 @@ def __init__( self._access_stats = defaultdict(lambda: (0, 0)) + self.mode = mode + + if "+" in self.mode: + self.mode += "w" + if "a" in self.mode: + self.mode += "w" + self.mode = "".join(set(self.mode)) + + if not os.path.exists(name) and "w" not in self.mode: + raise FileNotFoundError("File not found: {}".format(self.name)) + if not os.path.exists(name): self._mapping = {} self.size = 0 @@ -77,6 +90,9 @@ def __init__( self._mapping = {int(k): v for k, v in meta["mapping"].items()} self.size = meta["size"] + if "a" in self.mode: + self._position = self.size + # Chunks that we are working on. self._chunks = {} @@ -104,7 +120,7 @@ def _flush_chunks(self, target: Optional[int] = None) -> None: self._chunks = dict(keep_chunks) def flush(self) -> None: - assert not self.closed + assert "w" in self.mode and not self.closed self._flush_chunks(0) @@ -115,14 +131,15 @@ def flush(self) -> None: def close(self) -> None: assert not self.closed - self.flush() + if "w" in self.mode: + self.flush() self.closed = True def isatty(self) -> bool: return False def readable(self) -> bool: - return not self.closed + return "r" in self.mode and not self.closed # def readline(size=-1) # def readlines(hint=-1) @@ -172,7 +189,7 @@ def seek(self, offset: int, whence=io.SEEK_SET) -> int: return position def truncate(self, size: Optional[int] = None) -> None: - assert not self.closed + assert "w" in self.mode and not self.closed if size is None: size = self._position # Update content hash @@ -186,7 +203,7 @@ def truncate(self, size: Optional[int] = None) -> None: self.flush() def read(self, size: int = -1) -> bytes: - assert not self.closed + assert "r" in self.mode and not self.closed result = io.BytesIO() max_size = self.size - self._position if size == -1: @@ -205,10 +222,10 @@ def read(self, size: int = -1) -> bytes: return result.getvalue() def writable(self) -> bool: - return not self.closed + return "w" in self.mode and not self.closed def write(self, data: bytes) -> None: - assert not self.closed + assert "w" in self.mode and not self.closed self.stats.setdefault("bytes_written", 0) self.stats["bytes_written"] += len(data) while data: diff --git a/src/backy/rbd/chunked/store.py b/src/backy/rbd/chunked/store.py index 123236e2..18d310bf 100644 --- a/src/backy/rbd/chunked/store.py +++ b/src/backy/rbd/chunked/store.py @@ -28,9 +28,10 @@ class Store(object): def __init__(self, path: Path, log: BoundLogger): self.path = path self.log = log.bind(subsystem="chunked-store") + self.path.mkdir(exist_ok=True) for x in range(256): subdir = self.path / f"{x:02x}" - subdir.mkdir(parents=True, exist_ok=True) + subdir.mkdir(exist_ok=True) if not self.path.joinpath("store").exists(): self.convert_to_v2() diff --git a/src/backy/rbd/source.py b/src/backy/rbd/source.py deleted file mode 100644 index 902fe77b..00000000 --- a/src/backy/rbd/source.py +++ /dev/null @@ -1,544 +0,0 @@ -import json -import os -import subprocess -import time -import uuid -from dataclasses import dataclass -from enum import Enum -from pathlib import Path -from typing import IO, Any, Literal, Optional, Set, cast - -import consulate -from structlog.stdlib import BoundLogger - -import backy -import backy.utils -from backy.ext_deps import BACKY_EXTRACT -from backy.report import ChunkMismatchReport -from backy.repository import Repository -from backy.revision import Revision, Trust -from backy.source import Source -from backy.utils import ( - CHUNK_SIZE, - END, - TimeOut, - TimeOutError, - copy, - posix_fadvise, - report_status, -) - -from .chunked import BackendException, Chunk, File, Hash, Store -from .rbd import RBDClient - - -def locked(target: str, mode: Literal["shared", "exclusive"]): - return Repository.locked(target, mode, repo_attr="repository") - - -class RestoreBackend(Enum): - AUTO = "auto" - PYTHON = "python" - RUST = "rust" - - def __str__(self): - return self.value - - -@dataclass(frozen=True) -class RestoreArgs: - target: str - backend: RestoreBackend = RestoreBackend.AUTO - - -class RBDSource(Source[RestoreArgs]): - type_ = "rbd" - subcommand = "backy-rbd" - - repository: Repository - ceph_rbd: "CephRBD" - store: Store - log: BoundLogger - - def __init__( - self, repository: Repository, ceph_rbd: "CephRBD", log: BoundLogger - ): - self.log = log.bind(subsystem="rbdsource") - self.repository = repository - self.ceph_rbd = ceph_rbd - self.store = Store(repository.path / "chunks", self.log) - - @classmethod - def from_config( - cls, repository: Repository, config: dict[str, Any], log: BoundLogger - ) -> "Source": - assert cls.type_ == config["type"] - return cls(repository, CephRBD.from_config(config, log), log) - - def _path_for_revision(self, revision: Revision) -> Path: - return self.repository.path / revision.uuid - - def open( - self, revision: Revision, parent: Optional[Revision] = None - ) -> File: - if parent and not self._path_for_revision(revision).exists(): - with self._path_for_revision(revision).open( - "wb" - ) as new, self._path_for_revision(parent).open("rb") as old: - # This is ok, this is just metadata, not the actual data. - new.write(old.read()) - file = File( - self._path_for_revision(revision), self.store, revision.stats - ) - - if file.writable() and self.repository.contains_distrusted: - # "Force write"-mode if any revision is distrusted. - self.log.warn("forcing-full") - self.store.force_writes = True - - return file - - ################# - # Making backups - - @locked(target=".backup", mode="exclusive") - @locked(target=".purge", mode="shared") - def backup(self, revision: Revision) -> bool: - self.repository.path.joinpath("last").unlink(missing_ok=True) - self.repository.path.joinpath("last.rev").unlink(missing_ok=True) - - start = time.time() - - if not self.ceph_rbd.ready(): - raise RuntimeError( - "Source is not ready (does it exist? can you access it?)" - ) - - try: - with self.ceph_rbd(revision) as source: - parent_rev = source.get_parent() - with self.open(revision, parent_rev) as file: - if parent_rev: - source.diff(file, parent_rev) - else: - source.full(file) - with self.open(revision) as file: - report = source.verify(file) - if report: - self.repository.add_report(report) - verified = not report - except BackendException: - self.log.exception("ceph-error-distrust-all") - verified = False - self.repository.distrust("local", skip_lock=True) - if not verified: - self.log.error( - "verification-failed", - revision_uuid=revision.uuid, - ) - revision.remove() - else: - self.log.info("verification-ok", revision_uuid=revision.uuid) - revision.stats["duration"] = time.time() - start - revision.write_info() - revision.readonly() - # Switched from a fine-grained syncing mechanism to "everything - # once" when we're done. This is as safe but much faster. - os.sync() - - # If there are distrusted revisions, then perform at least one - # verification after a backup - for good measure and to keep things - # moving along automatically. This could also be moved into the - # scheduler. - self.repository.scan() - # TODO: move this to cli/daemon? - for revision in reversed( - self.repository.get_history(clean=True, local=True) - ): - if revision.trust == Trust.DISTRUSTED: - self.log.warning("inconsistent") - self.verify(revision, skip_lock=True) - break - return verified - - @locked(target=".purge", mode="shared") - @report_status - def verify(self, revision: Revision): - log = self.log.bind(revision_uuid=revision.uuid) - log.info("verify-start") - verified_chunks: Set[Hash] = set() - - # Load verified chunks to avoid duplicate work - for verified_revision in self.repository.get_history( - clean=True, local=True - ): - if verified_revision.trust != Trust.VERIFIED: - continue - verified_chunks.update( - self.open(verified_revision)._mapping.values() - ) - - log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) - - errors = False - # Go through all chunks and check them. Delete problematic ones. - f = self.open(revision) - hashes = set(f._mapping.values()) - verified_chunks - yield len(hashes) + 2 - for candidate in hashes: - yield - if candidate in verified_chunks: - continue - try: - c = Chunk(self.store, candidate) - c._read_existing() - except Exception: - log.exception("verify-error", chunk=candidate) - errors = True - try: - self.store.chunk_path(candidate).unlink(missing_ok=True) - except Exception: - log.exception("verify-remove-error", chunk=candidate) - # This is an optimisation: we can skip this revision, purge it - # and then keep verifying other chunks. This avoids checking - # things unnecessarily in duplicate. - # And we only mark it as verified if we never saw any problems. - break - - yield - - # TODO: move this to cli/daemon? - if errors: - # Found any issues? Delete this revision as we can't trust it. - revision.remove() - else: - # No problems found - mark as verified. - revision.verify() - revision.write_info() - - yield - - # Purge to ensure that we don't leave unused, potentially untrusted - # stuff around, especially if this was the last revision. - self.gc(skip_lock=True) - - yield END - yield None - - @locked(target=".purge", mode="exclusive") - def gc(self) -> None: - self.log.debug("purge") - used_chunks: Set[Hash] = set() - # TODO: also remove mapping file - # TODO: purge quarantine store - for revision in self.repository.local_history: - used_chunks.update(self.open(revision)._mapping.values()) - self.store.purge(used_chunks) - # TODO: move this to cli/daemon? - self.repository.clear_purge_pending() - - ################# - # Restoring - - # This needs no locking as it's only a wrapper for restore_file and - # restore_stdout and locking isn't re-entrant. - def restore(self, revision: Revision, args: RestoreArgs) -> None: - s = self.open(revision) - restore_backend = args.backend - if restore_backend == RestoreBackend.AUTO: - if self.backy_extract_supported(s): - restore_backend = RestoreBackend.RUST - else: - restore_backend = RestoreBackend.PYTHON - self.log.info("restore-backend", backend=restore_backend.value) - if restore_backend == RestoreBackend.PYTHON: - with s as source: - if args.target != "-": - self.restore_file(source, args.target) - else: - self.restore_stdout(source) - elif restore_backend == RestoreBackend.RUST: - self.restore_backy_extract(revision, args.target) - - def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: - log = self.log.bind(subsystem="backy-extract") - if file.size % CHUNK_SIZE != 0: - log.debug("not-chunk-aligned") - return False - try: - version = subprocess.check_output( - [BACKY_EXTRACT, "--version"], - encoding="utf-8", - errors="replace", - ) - if not version.startswith("backy-extract"): - log.debug("unknown-version") - return False - except Exception: - log.debug("unavailable") - return False - return True - - # backy-extract acquires lock - def restore_backy_extract(self, rev: Revision, target: str) -> None: - log = self.log.bind(subsystem="backy-extract") - cmd = [BACKY_EXTRACT, str(self.repository.path / rev.uuid), target] - log.debug("started", cmd=cmd) - proc = subprocess.Popen(cmd) - return_code = proc.wait() - log.info( - "finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"backy-extract failed with return code {return_code}. " - "Maybe try `--backend python`?" - ) - - @locked(target=".purge", mode="shared") - def restore_file(self, source: IO, target_name: str) -> None: - """Bulk-copy from open revision `source` to target file.""" - self.log.debug("restore-file", source=source.name, target=target_name) - open(target_name, "ab").close() # touch into existence - with open(target_name, "r+b", buffering=CHUNK_SIZE) as target: - try: - posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore - except Exception: - pass - copy(source, target) - - @locked(target=".purge", mode="shared") - def restore_stdout(self, source: IO) -> None: - """Emit restore data to stdout (for pipe processing).""" - self.log.debug("restore-stdout", source=source.name) - try: - posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore - except Exception: - pass - with os.fdopen(os.dup(1), "wb") as target: - while True: - chunk = source.read(CHUNK_SIZE) - if not chunk: - break - target.write(chunk) - - -class CephRBD: - """The Ceph RBD source. - - Manages snapshots corresponding to revisions and provides a verification - that tries to balance reliability and performance. - """ - - pool: str - image: str - always_full: bool - vm: Optional[str] - consul_acl_token: Optional[str] - rbd: RBDClient - revision: Revision - log: BoundLogger - - snapshot_timeout = 90 - - def __init__( - self, - pool: str, - image: str, - log: BoundLogger, - vm: Optional[str] = None, - consul_acl_token: Optional[str] = None, - always_full: bool = False, - ): - self.pool = pool - self.image = image - self.always_full = always_full - self.vm = vm - self.consul_acl_token = consul_acl_token - self.log = log.bind(subsystem="ceph") - self.rbd = RBDClient(self.log) - - @classmethod - def from_config(cls, config: dict, log: BoundLogger) -> "CephRBD": - return cls( - config["pool"], - config["image"], - log, - config.get("vm"), - config.get("consul_acl_token"), - config.get("full-always", False), - ) - - def ready(self) -> bool: - """Check whether the source can be backed up. - - For RBD sources this means the volume exists and is accessible. - - """ - try: - if self.rbd.exists(self._image_name): - return True - except Exception: - self.log.exception("not-ready") - return False - - def __call__(self, revision): - self.revision = revision - return self - - def __enter__(self): - snapname = "backy-{}".format(self.revision.uuid) - self.create_snapshot(snapname) - return self - - def create_snapshot(self, name: str) -> None: - if not self.consul_acl_token or not self.vm: - self.rbd.snap_create(self._image_name + "@" + name) - return - - consul = consulate.Consul(token=self.consul_acl_token) - snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) - self.log.info( - "creating-snapshot", - snapshot_name=name, - snapshot_key=snapshot_key, - ) - - consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} - - time.sleep(3) - try: - timeout = TimeOut( - self.snapshot_timeout, interval=2, raise_on_timeout=True - ) - while timeout.tick(): - for snapshot in self.rbd.snap_ls(self._image_name): - if snapshot["name"] == name: - return - except TimeOutError: - # The VM might have been shut down. Try doing a regular Ceph - # snapshot locally. - self.rbd.snap_create(self._image_name + "@" + name) - except KeyboardInterrupt: - raise - finally: - # In case the snapshot still gets created: the general snapshot - # deletion code in ceph/source will clean up unused backy snapshots - # anyway. However, we need to work a little harder to delete old - # snapshot requests, otherwise we've sometimes seen those not - # getting deleted and then re-created all the time. - for key in list(consul.kv.find("snapshot/")): - try: - s = consul.kv[key] - except KeyError: - continue - try: - s = json.loads(s) - except json.decoder.JSONDecodeError: - # Clean up garbage. - self.log.warning( - "create-snapshot-removing-garbage-request", - snapshot_key=key, - ) - del consul.kv[key] - if s["vm"] != self.vm: - continue - # The knowledge about the `backy-` prefix isn't properly - # encapsulated here. - if s["snapshot"].startswith("backy-"): - self.log.info( - "create-snapshot-removing-request", - vm=s["vm"], - snapshot_name=s["snapshot"], - snapshot_key=key, - ) - del consul.kv[key] - - @property - def _image_name(self) -> str: - return "{}/{}".format(self.pool, self.image) - - def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): - self._delete_old_snapshots() - - def get_parent(self) -> Optional[Revision]: - if self.always_full: - self.log.info("backup-always-full") - return None - revision = self.revision - while True: - parent = revision.get_parent() - if not parent: - self.log.info("backup-no-valid-parent") - return None - if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): - self.log.info( - "ignoring-rev-without-snapshot", - revision_uuid=parent.uuid, - ) - revision = parent - continue - # Ok, it's trusted and we have a snapshot. Let's do a diff. - return parent - - def diff(self, target: File, parent: Revision) -> None: - self.log.info("diff") - snap_from = "backy-" + parent.uuid - snap_to = "backy-" + self.revision.uuid - s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) - with s as source: - source.integrate(target, snap_from, snap_to) - self.log.info("diff-integration-finished") - - def full(self, target: File) -> None: - self.log.info("full") - s = self.rbd.export( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - with s as source: - while buf := source.read(4 * backy.utils.MiB): - target.write(buf) - - def verify(self, target: File) -> Optional[ChunkMismatchReport]: - s = self.rbd.image_reader( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - self.revision.stats["ceph-verification"] = "partial" - - with s as source: - self.log.info("verify") - return backy.utils.files_are_roughly_equal(source, cast(IO, target)) - - def _delete_old_snapshots(self) -> None: - # Clean up all snapshots except the one for the most recent valid - # revision. - # Previously we used to remove all snapshots but the one for this - # revision - which is wrong: broken new revisions would always cause - # full backups instead of new deltas based on the most recent valid - # one. - # XXX this will break if multiple servers are active - if not self.always_full and self.revision.repository.local_history: - keep_snapshot_revision = self.revision.repository.local_history[ - -1 - ].uuid - else: - keep_snapshot_revision = None - for snapshot in self.rbd.snap_ls(self._image_name): - if not snapshot["name"].startswith("backy-"): - # Do not touch non-backy snapshots - continue - uuid = snapshot["name"].replace("backy-", "") - if uuid != keep_snapshot_revision: - time.sleep(3) # avoid race condition while unmapping - self.log.info( - "delete-old-snapshot", snapshot_name=snapshot["name"] - ) - try: - self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) - except Exception: - self.log.exception( - "delete-old-snapshot-failed", - snapshot_name=snapshot["name"], - ) diff --git a/src/backy/rbd/tests/conftest.py b/src/backy/rbd/tests/conftest.py index 47ee82e8..35cb80b0 100644 --- a/src/backy/rbd/tests/conftest.py +++ b/src/backy/rbd/tests/conftest.py @@ -5,24 +5,7 @@ import pytest import backy.rbd.rbd -from backy.rbd.source import RBDClient, RBDSource -from backy.repository import Repository -from backy.revision import Revision -from backy.schedule import Schedule - - -@pytest.fixture -def repository(tmp_path, log): - repo = Repository(tmp_path, RBDSource, Schedule(), log) - repo.connect() - return repo - - -def create_rev(repository, tags) -> Revision: - r = Revision.create(repository, tags, repository.log) - r.materialize() - repository.scan() - return repository.find_by_uuid(r.uuid) +from backy.rbd import RBDClient class CephCLIBase: diff --git a/src/backy/rbd/tests/test_ceph.py b/src/backy/rbd/tests/test_ceph.py index ee261aad..9febfca2 100644 --- a/src/backy/rbd/tests/test_ceph.py +++ b/src/backy/rbd/tests/test_ceph.py @@ -10,9 +10,8 @@ import pytest import backy.utils -from backy.rbd import RBDSource +from backy.rbd import CephRBD, RBDSource from backy.rbd.rbd import RBDDiffV1 -from backy.rbd.source import CephRBD from backy.revision import Revision BLOCK = backy.utils.PUNCH_SIZE @@ -172,7 +171,7 @@ def test_diff_backup(ceph_rbd, rbdsource, repository, tmp_path, log): ) revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - with rbdsource.open(parent) as f: + with rbdsource.open(parent, "wb") as f: f.write(b"asdf") repository.scan() @@ -189,7 +188,7 @@ def test_diff_backup(ceph_rbd, rbdsource, repository, tmp_path, log): export.return_value.__enter__.return_value = RBDDiffV1( io.BytesIO(SAMPLE_RBDDIFF) ) - with ceph_rbd(revision), rbdsource.open(revision) as f: + with ceph_rbd(revision), rbdsource.open(revision, "wb") as f: ceph_rbd.diff(f, revision.get_parent()) repository.history.append(revision) export.assert_called_with( @@ -213,7 +212,7 @@ def test_full_backup(ceph_rbd, rbdsource, repository, tmp_path, log): with mock.patch("backy.rbd.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han likes Leia.") - with ceph_rbd(revision), rbdsource.open(revision) as f: + with ceph_rbd(revision), rbdsource.open(revision, "wb") as f: ceph_rbd.full(f) export.assert_called_with("test/foo@backy-a0") @@ -231,7 +230,7 @@ def test_full_backup(ceph_rbd, rbdsource, repository, tmp_path, log): with mock.patch("backy.rbd.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(b"Han loves Leia.") - with ceph_rbd(revision2), rbdsource.open(revision2) as f: + with ceph_rbd(revision2), rbdsource.open(revision2, "wb") as f: ceph_rbd.full(f) with rbdsource.open(revision2) as f: @@ -264,7 +263,7 @@ def test_full_backup_integrates_changes( for content, rev in [(content0, rev0), (content1, rev1)]: with mock.patch("backy.rbd.rbd.RBDClient.export") as export: export.return_value = io.BytesIO(content) - with ceph_rbd(rev), rbdsource.open(rev) as target: + with ceph_rbd(rev), rbdsource.open(rev, "wb") as target: ceph_rbd.full(target) export.assert_called_with("test/foo@backy-{}".format(rev.uuid)) @@ -284,7 +283,7 @@ def test_verify_fail(ceph_rbd, rbdsource, repository, tmp_path, log): with open(rbd_source, "w") as f: f.write("Han likes Leia.") - with rbdsource.open(revision) as f: + with rbdsource.open(revision, "wb") as f: f.write(b"foobar") # The chunked store has false data, so this needs to be detected. with ceph_rbd(revision), rbdsource.open(revision) as target: @@ -304,7 +303,7 @@ def test_verify(ceph_rbd, rbdsource, repository, tmp_path, log): f.write(b"Han likes Leia.") ceph_rbd.rbd.unmap(rbd_source) - with rbdsource.open(revision) as f: + with rbdsource.open(revision, "wb") as f: f.write(b"Han likes Leia.") f.flush() diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 43930166..26b5ae69 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -1,32 +1,19 @@ -import datetime import os import pprint -import sys from functools import partialmethod import pytest import backy.rbd from backy import utils -from backy.rbd import main -from backy.repository import Repository +from backy.rbd import RBDSource from backy.revision import Revision -from backy.schedule import Schedule -from backy.source import Source +from backy.source import CmdLineSource from backy.tests import Ellipsis @pytest.fixture -def argv(): - original = sys.argv - new = original[:1] - sys.argv = new - yield new - sys.argv = original - - -@pytest.fixture -def repository_on_disk(tmp_path, log): +def source_on_disk(tmp_path, log): with open(tmp_path / "config", "w", encoding="utf-8") as f: f.write( f""" @@ -36,31 +23,22 @@ def repository_on_disk(tmp_path, log): daily: interval: 1d keep: 7 -type: rbd -""" - ) - with open(tmp_path / "source.config", "w", encoding="utf-8") as f: - f.write( - """ ---- -type: rbd -pool: a -image: b +source: + type: rbd + pool: a + image: b """ ) - repo = Repository(tmp_path, Source, Schedule(), log) - repo.connect() - return repo + return CmdLineSource.load(tmp_path, log).create_source() -def test_display_usage(capsys, argv): - with pytest.raises(SystemExit) as exit: - main() - assert exit.value.code == 0 +def test_display_usage(capsys): + exit = RBDSource.main("backy-rbd") + assert exit == 0 out, err = capsys.readouterr() assert ( """\ -usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] +usage: pytest [-h] [-v] [-C WORKDIR] [-t TASKID] {backup,restore,gc,verify} ... """ == out @@ -68,19 +46,19 @@ def test_display_usage(capsys, argv): assert err == "" -def test_display_help(capsys, argv): - argv.append("--help") +def test_display_help(capsys): with pytest.raises(SystemExit) as exit: - main() + RBDSource.main("backy-rbd", "--help") assert exit.value.code == 0 out, err = capsys.readouterr() assert ( Ellipsis( """\ -usage: pytest [-h] [-v] [-b BACKUPDIR] [-t TASKID] +usage: pytest [-h] [-v] [-C WORKDIR] [-t TASKID] {backup,restore,gc,verify} ... -Backup and restore for block devices. +The rbd plugin for backy. You should not call this directly. Use the backy +command instead. positional arguments: ... @@ -91,13 +69,11 @@ def test_display_help(capsys, argv): assert err == "" -def test_verbose_logging(capsys, argv): +def test_verbose_logging(capsys): # This is just a smoke test to ensure the appropriate code path # for -v is covered. - argv.extend(["-v"]) - with pytest.raises(SystemExit) as exit: - main() - assert exit.value.code == 0 + exit = RBDSource.main("backy-rbd", "-v") + assert exit == 0 def print_args(*args, return_value=None, **kw): @@ -107,46 +83,41 @@ def print_args(*args, return_value=None, **kw): @pytest.mark.parametrize( - ["fun", "args", "rv", "rc", "params"], + ["args", "rv", "rc", "params"], [ ( - "backup", - ["asdf"], + ["backup", "asdf"], 0, 1, [""], ), ( - "backup", - ["asdf"], + ["backup", "asdf"], 1, 0, [""], ), ( - "restore", - ["asdf", "out.img"], + ["restore", "asdf", "out.img"], None, 0, [ "", - "RestoreArgs(target='out.img', backend=)", + "RBDRestoreArgs(target='out.img', backend=)", ], ), ( - "restore", - ["asdf", "--backend", "python", "out.img"], + ["restore", "asdf", "--backend", "python", "out.img"], None, 0, [ "", - "RestoreArgs(target='out.img', backend=)", + "RBDRestoreArgs(target='out.img', backend=)", ], ), - ("gc", [], None, 0, []), + (["gc"], None, 0, []), ( - "verify", - ["asdf"], + ["verify", "asdf"], None, 0, [""], @@ -154,36 +125,33 @@ def print_args(*args, return_value=None, **kw): ], ) def test_call_fun( - fun, args, rv, rc, params, - repository_on_disk, + source_on_disk, tmp_path, capsys, - argv, monkeypatch, log, ): os.chdir(tmp_path) - Revision(repository_on_disk, log, uuid="asdf").materialize() + Revision(source_on_disk.repository, log, uuid="asdf").materialize() monkeypatch.setattr( - backy.rbd.source.RBDSource, - fun, + backy.rbd.RBDSource, + args[0], partialmethod(print_args, return_value=rv), ) - argv.extend(["-v", fun, *args]) utils.log_data = "" - with pytest.raises(SystemExit) as exit: - main() + exit = RBDSource.main("backy-rbd", "-v", *args) + assert exit == rc out, err = capsys.readouterr() assert ( Ellipsis( f"""\ -{", ".join(["", *params])} +{", ".join(["", *params])} {{}} """ ) @@ -192,20 +160,17 @@ def test_call_fun( assert ( Ellipsis( f"""\ -... D - command/invoked args='... -v {" ".join([fun, *args])}' +... D - command/invoked args='backy-rbd -v {" ".join([ *args])}' ... D - repo/scan-reports entries=0 -... I - chunked-store/to-v2 \n\ -... I - chunked-store/to-v2-finished \n\ ... D - command/return-code code={rc} """ ) == utils.log_data ) - assert exit.value.code == rc def test_call_unexpected_exception( - capsys, repository_on_disk, argv, monkeypatch, log, tmp_path + capsys, source_on_disk, monkeypatch, log, tmp_path ): def do_raise(*args, **kw): raise RuntimeError("test") @@ -215,23 +180,21 @@ def do_raise(*args, **kw): monkeypatch.setattr(os, "_exit", lambda x: None) - argv.extend(["-b", str(repository_on_disk.path), "gc"]) utils.log_data = "" - with pytest.raises(SystemExit): - main() + exit = RBDSource.main( + "backy-rbd", "-C", str(source_on_disk.repository.path), "gc" + ) + assert exit == 1 out, err = capsys.readouterr() - print(utils.log_data) assert "" == out assert ( Ellipsis( """\ -... D - command/invoked args='... -b ... gc' +... D - command/invoked args='backy-rbd -C ... gc' ... D - repo/scan-reports entries=0 -... I - chunked-store/to-v2 \n\ -... I - chunked-store/to-v2-finished \n\ ... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): -exception>\t File ".../src/backy/rbd/__init__.py", line ..., in main +exception>\t File ".../src/backy/source.py", line ..., in main exception>\t source.gc() exception>\t File ".../src/backy/rbd/tests/test_main.py", line ..., in do_raise exception>\t raise RuntimeError("test") diff --git a/src/backy/rbd/tests/test_rbd.py b/src/backy/rbd/tests/test_rbd.py index 2c9756e4..fb80819f 100644 --- a/src/backy/rbd/tests/test_rbd.py +++ b/src/backy/rbd/tests/test_rbd.py @@ -4,8 +4,8 @@ import pytest from backy.ext_deps import RBD +from backy.rbd import RBDClient from backy.rbd.rbd import RBDDiffV1 -from backy.rbd.source import RBDClient @mock.patch("subprocess.check_output") diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index 94a64061..d41308e5 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -1,4 +1,3 @@ -import json import os import subprocess from pathlib import Path @@ -7,10 +6,10 @@ import pytest +from backy.conftest import create_rev from backy.ext_deps import BACKY_RBD_CMD, BASH -from backy.rbd import RBDSource, RestoreArgs -from backy.rbd.source import CephRBD -from backy.rbd.tests.conftest import create_rev +from backy.rbd import CephRBD, RBDRestoreArgs, RBDSource +from backy.source import CmdLineSource from backy.tests import Ellipsis from backy.utils import CHUNK_SIZE @@ -50,16 +49,16 @@ def rbdsource(repository, log): def test_configure_rbd_source_no_consul(repository, tmp_path, log): - with open(tmp_path / "source.config", "w", encoding="utf-8") as f: - json.dump( - { - "type": "rbd", - "pool": "test", - "image": "test04.root", - }, - f, - ) - source = repository.get_source() + config = { + "path": str(tmp_path), + "schedule": {}, + "source": { + "type": "rbd", + "pool": "test", + "image": "test04.root", + }, + } + source = CmdLineSource.from_config(config, log).create_source() assert isinstance(source, RBDSource) ceph_rbd = source.ceph_rbd assert isinstance(ceph_rbd, CephRBD) @@ -71,19 +70,19 @@ def test_configure_rbd_source_no_consul(repository, tmp_path, log): def test_configure_rbd_source_consul(repository, tmp_path, log): - with open(tmp_path / "source.config", "w", encoding="utf-8") as f: - json.dump( - { - "type": "rbd", - "pool": "test", - "image": "test04.root", - "full-always": True, - "vm": "test04", - "consul_acl_token": "token", - }, - f, - ) - source = repository.get_source() + config = { + "path": str(tmp_path), + "schedule": {}, + "source": { + "type": "rbd", + "pool": "test", + "image": "test04.root", + "full-always": True, + "vm": "test04", + "consul_acl_token": "token", + }, + } + source = CmdLineSource.from_config(config, log).create_source() assert isinstance(source, RBDSource) ceph_rbd = source.ceph_rbd assert isinstance(ceph_rbd, CephRBD) @@ -100,7 +99,7 @@ def test_restore_target(rbdsource, repository, tmp_path, log): target = tmp_path / "restore.img" r = create_rev(repository, {"daily"}) rbdsource.backup(r) - rbdsource.restore(r, RestoreArgs(str(target))) + rbdsource.restore(r, RBDRestoreArgs(str(target))) with open(target, "rb") as t: assert data == t.read() @@ -110,7 +109,7 @@ def test_restore_stdout(rbdsource, repository, capfd, log): rbdsource.ceph_rbd.data = data r = create_rev(repository, {"daily"}) rbdsource.backup(r) - rbdsource.restore(r, RestoreArgs("-")) + rbdsource.restore(r, RBDRestoreArgs("-")) assert not Path("-").exists() out, err = capfd.readouterr() assert data.decode("utf-8") == out @@ -124,7 +123,7 @@ def test_restore_backy_extract(rbdsource, repository, monkeypatch, log): rbdsource.ceph_rbd.data = data r = create_rev(repository, {"daily"}) rbdsource.backup(r) - rbdsource.restore(r, RestoreArgs("restore.img")) + rbdsource.restore(r, RBDRestoreArgs("restore.img")) check_output.assert_called() rbdsource.restore_backy_extract.assert_called_once_with(r, "restore.img") @@ -149,7 +148,7 @@ def test_backup_corrupted(rbdsource, repository, log): def test_gc(rbdsource, repository, log): r = create_rev(repository, set()) # Write 1 version to the file - with rbdsource.open(r) as f: + with rbdsource.open(r, "wb") as f: f.write(b"asdf") remote = create_rev(repository, set()) # remote revision without local data remote.server = "remote" @@ -178,7 +177,7 @@ def test_smoketest_internal(rbdsource, repository, tmp_path, log): rbdsource.backup(rev1) # Restore first state from the newest revision - restore_args = RestoreArgs(str(tmp_path / "image1.restore")) + restore_args = RBDRestoreArgs(str(tmp_path / "image1.restore")) rbdsource.restore(rev1, restore_args) with pytest.raises(IOError): open(repository.history[-1].info_filename, "wb") diff --git a/src/backy/repository.py b/src/backy/repository.py index 97456222..bdee86cd 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -7,11 +7,8 @@ from typing import IO, Any, List, Literal, Optional, TypedDict import tzlocal -import yaml from structlog.stdlib import BoundLogger -import backy -import backy.source from backy.utils import ( duplicates, list_get, @@ -24,7 +21,6 @@ from .report import ProblemReport from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule -from .source import Source class StatusDict(TypedDict): @@ -38,7 +34,7 @@ class StatusDict(TypedDict): next_time: Optional[datetime.datetime] next_tags: Optional[str] manual_tags: str - problem_reports: List[str] + problem_reports: int unsynced_revs: int local_revs: int @@ -60,7 +56,6 @@ class Repository(object): path: Path report_path: Path - sourcetype: type[backy.source.Source] schedule: Schedule history: List[Revision] report_ids: List[str] @@ -72,63 +67,24 @@ class Repository(object): def __init__( self, path: Path, - sourcetype: type[backy.source.Source], schedule: Schedule, log: BoundLogger, ): self.path = path.resolve() self.report_path = self.path / "quarantine" self.schedule = schedule - self.sourcetype = sourcetype self.log = log.bind(subsystem="repo") self._lock_fds = {} def connect(self): - self.path.mkdir(parents=True, exist_ok=True) + self.path.mkdir(exist_ok=True) + self.report_path.mkdir(exist_ok=True) self.scan() self.scan_reports() - def get_source(self): - return self.sourcetype.from_repo(self) - - @staticmethod - def from_config(config: dict[str, Any], log: BoundLogger) -> "Repository": - schedule = Schedule() - schedule.configure(config["schedule"]) - try: - sourcetype = backy.source.factory_by_type(config["type"]) - except KeyError: - log.error( - "unknown-source-type", - _fmt_msg="Unknown source type '{type}'. You will be limited to metadata only operations...", - type=config["type"], - ) - sourcetype = Source[None] - - return Repository(Path(config["path"]), sourcetype, schedule, log) - - @classmethod - def load(cls, path: Path, log: BoundLogger) -> "Repository": - try: - with path.joinpath("config").open(encoding="utf-8") as f: - config = yaml.safe_load(f) - return cls.from_config(config, log) - except IOError: - log.error( - "could-not-read-config", - _fmt_msg="Could not read config file. Is the path correct?", - config_path=str(path / "config"), - ) - raise - - def store(self) -> None: - with self.path.joinpath("config").open(encoding="utf-8") as f: - yaml.safe_dump(self.to_dict(), f) - def to_dict(self) -> dict[str, Any]: return { "schedule": self.schedule.to_dict(), - "type": self.sourcetype.type_, "path": str(self.path), } diff --git a/src/backy/revision.py b/src/backy/revision.py index ba531a64..9d59e1d3 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -147,9 +147,9 @@ def remove(self, force=False) -> None: self.write_info() else: if self.info_filename.exists(): - self.log.debug("remove-start", filename=self.info_filename) + self.log.debug("remove-start", filename=str(self.info_filename)) self.info_filename.unlink() - self.log.debug("remove-end", filename=self.info_filename) + self.log.debug("remove-end", filename=str(self.info_filename)) if self in self.repository.history: self.repository.history.remove(self) diff --git a/src/backy/source.py b/src/backy/source.py index 2b5066ac..6ddbf2ee 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -1,13 +1,24 @@ +import argparse +import asyncio +import errno +import filecmp +import subprocess from abc import ABC, abstractmethod +from argparse import ArgumentParser, Namespace +from dataclasses import dataclass from importlib.metadata import entry_points -from typing import TYPE_CHECKING, Any, Generic, TypeVar +from pathlib import Path +from typing import Any, Generic, Iterable, Optional, TypeVar, cast +import structlog import yaml from structlog.stdlib import BoundLogger -if TYPE_CHECKING: - from backy.repository import Repository - from backy.revision import Revision +from backy import logging +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule +from backy.utils import SafeFile, generate_taskid SOURCE_PLUGINS = entry_points(group="backy.sources") @@ -16,10 +27,31 @@ def factory_by_type(type_) -> type["Source"]: return SOURCE_PLUGINS[type_].load() -RestoreArgsType = TypeVar("RestoreArgsType") +RestoreArgsType = TypeVar("RestoreArgsType", bound="RestoreArgs") +SourceType = TypeVar("SourceType", bound="Source") -class Source(Generic[RestoreArgsType]): + +@dataclass(frozen=True) +class RestoreArgs(ABC): + @abstractmethod + def to_cmdargs(self) -> Iterable[str]: + ... + + @classmethod + @abstractmethod + def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + ... + + @classmethod + @abstractmethod + def from_args( + cls: type[RestoreArgsType], args: Namespace + ) -> RestoreArgsType: + ... + + +class Source(ABC, Generic[RestoreArgsType]): """A source provides specific implementations for making and restoring backups. @@ -52,43 +84,313 @@ class Source(Generic[RestoreArgsType]): """ - type_: str = "" - subcommand: str + type_: str + restore_type: type[RestoreArgsType] + repository: "Repository" + + def __init__(self, repository: "Repository"): + self.repository = repository + + @classmethod + @abstractmethod + def from_config( + cls: type[SourceType], + repository: "Repository", + config: dict[str, Any], + log: BoundLogger, + ) -> SourceType: + ... + + # @abstractmethod + # def to_config(self) -> dict[str, Any]: + # ... + + @abstractmethod + def backup(self, revision: "Revision") -> bool: + ... + + @abstractmethod + def restore(self, revision: "Revision", args: RestoreArgsType): + ... + + @abstractmethod + def verify(self, revision: "Revision"): + ... + + @abstractmethod + def gc(self) -> None: + ... + + @classmethod + def create_argparse(cls) -> ArgumentParser: + parser = argparse.ArgumentParser( + description=f"The {cls.type_} plugin for backy. You should not call this directly. Use the backy command instead.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + # parser.add_argument( + # "-c", + # "--config", + # type=Path, + # default="/etc/backy.conf", + # help="(default: %(default)s)", + # ) + parser.add_argument( + "-C", + dest="workdir", + default=".", + type=Path, + help=( + "Run as if backy was started in instead of the current " + "working directory." + ), + ) + parser.add_argument( + "-t", + "--taskid", + default=generate_taskid(), + help="ID to include in log messages (default: 4 random base32 chars)", + ) + + subparsers = parser.add_subparsers() + + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.set_defaults(func="backup") + # TODO: decide if the rev should be created + p.add_argument("revision", help="Revision to create.") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target", + ) + p.add_argument("revision", help="Revision to restore.") + cls.restore_type.setup_argparse(p) + p.set_defaults(func="restore") + + # GC + p = subparsers.add_parser( + "gc", + help="Remove unused data from the repository.", + ) + p.set_defaults(func="gc") + + # VERIFY + p = subparsers.add_parser( + "verify", + help="Verify specified revision", + ) + p.add_argument("revision", help="Revision to work on.") + p.set_defaults(func="verify") + + return parser @classmethod - def from_repo(cls, repository: "Repository"): - assert ( - repository.sourcetype == cls - ), f"this repo requires a {repository.sourcetype.type_} source and not a {cls.type_} source" - path = repository.path.joinpath(f"source.config") + def main(cls, *str_args: str) -> int: + parser = cls.create_argparse() + + args = parser.parse_args(str_args[1:]) + + if not hasattr(args, "func"): + parser.print_usage() + return 0 + + # Logging + logging.init_logging( + args.verbose, + args.workdir / "backy.log", + defaults={"taskid": args.taskid}, + ) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(str_args)) + + try: + source = CmdLineSource.load(args.workdir, log).create_source(cls) + + ret = 0 + match args.func: + case "backup": + rev = source.repository.find_by_uuid(args.revision) + success = source.backup(rev) + ret = int(not success) + case "restore": + rev = source.repository.find_by_uuid(args.revision) + source.restore(rev, cls.restore_type.from_args(args)) + case "gc": + source.gc() + case "verify": + rev = source.repository.find_by_uuid(args.revision) + source.verify(rev) + case _: + raise ValueError("invalid function: " + args.fun) + log.debug("return-code", code=ret) + return ret + except Exception as e: + if isinstance(e, IOError) and e.errno in [ + errno.EDEADLK, + errno.EAGAIN, + ]: + log.warning("repo-currently-locked") + else: + log.exception("failed") + return 1 + + +class CmdLineSource: + repository: "Repository" + source_conf: dict[str, Any] + log: BoundLogger + + @property + def type_(self): + return self.source_conf["type"] + + @property + def subcommand(self) -> str: + return "backy-" + self.type_ + + @property + def taskid(self): + return self.log._context.get( + "subtaskid", self.log._context.get("taskid", generate_taskid()) + ) + + def __init__( + self, + repository: "Repository", + source_conf: dict[str, Any], + log: BoundLogger, + ): + self.repository = repository + self.source_conf = source_conf + self.log = log.bind(subsystem="cmdlinesource") + + @classmethod + def from_config( + cls, config: dict[str, Any], log: BoundLogger + ) -> "CmdLineSource": + schedule = Schedule() + schedule.configure(config["schedule"]) + repo = Repository(Path(config["path"]), schedule, log) + repo.connect() + return cls(repo, config["source"], log) + + @classmethod + def load(cls, path: Path, log: BoundLogger) -> "CmdLineSource": + path = path / "config" try: with path.open(encoding="utf-8") as f: config = yaml.safe_load(f) + return cls.from_config(config, log) except IOError: - repository.log.error( - "could-not-read-source-config", + log.error( + "source-config-error", _fmt_msg="Could not read source config file. Is the path correct?", config_path=str(path), ) raise - return cls.from_config(repository, config, repository.log) + def to_config(self) -> dict[str, Any]: + return { + "path": str(self.repository.path), + "source": self.source_conf, + "schedule": self.repository.schedule.config, + } - @classmethod - @abstractmethod - def from_config( - cls, repository: "Repository", config: dict[str, Any], log: BoundLogger - ) -> "Source": - ... + def store(self) -> None: + """Writes config file for 'backy-' subprocess.""" - @abstractmethod - def to_config(self) -> dict[str, Any]: - ... + # We do not want to create leading directories, only + # the backup directory itself. If the base directory + # does not exist then we likely don't have a correctly + # configured environment. + self.repository.path.mkdir(exist_ok=True) + config = self.repository.path / "config" + with SafeFile(config, encoding="utf-8") as f: + f.open_new("wb") + yaml.safe_dump(self.to_config(), f) + if config.exists() and filecmp.cmp(config, f.name): + raise ValueError("not changed") - @abstractmethod - def backup(self, revision: "Revision") -> "Source": - ... + def create_source( + self, sourcetype: Optional[type[SourceType]] = None + ) -> SourceType: + if sourcetype: + sourcetype_ = sourcetype + else: + try: + sourcetype_ = cast( + type[SourceType], factory_by_type(self.type_) + ) + except KeyError: + self.log.error( + "unknown-source-type", + _fmt_msg="Unknown source type '{type}'.", + type=self.type_, + ) + raise + + return sourcetype_.from_config( + self.repository, self.source_conf, self.log + ) + + def run(self, *args): + return self.invoke( + self.subcommand, + "-t", + self.taskid, + "-C", + str(self.repository.path), + *args, + ) + + def invoke(self, *args): + self.log.info("run", cmd=" ".join(args)) + proc = subprocess.run(args) + self.log.debug("run-finished", return_code=proc.returncode) + return proc.returncode + + def backup(self, revision: "Revision"): + return self.run("backup", revision.uuid) - @abstractmethod def restore(self, revision: "Revision", args: RestoreArgsType): - ... + return self.run("restore", revision.uuid, *args.to_cmdargs()) + + def verify(self, revision: "Revision"): + return self.run("verify", revision.uuid) + + def gc(self): + return self.run("gc") + + +class AsyncCmdLineSource(CmdLineSource): + async def invoke(self, *args): + self.log.info("run", cmd=" ".join(args)) + proc = await asyncio.create_subprocess_exec( + *args, + start_new_session=True, # Avoid signal propagation like Ctrl-C. + close_fds=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + try: + return_code = await proc.wait() + self.log.debug( + "run-finished", + return_code=return_code, + subprocess_pid=proc.pid, + ) + return return_code + except asyncio.CancelledError: + self.log.warning("run-cancelled", subprocess_pid=proc.pid) + try: + proc.terminate() + except ProcessLookupError: + pass + raise diff --git a/src/backy/tests/conftest.py b/src/backy/tests/conftest.py deleted file mode 100644 index f52ab804..00000000 --- a/src/backy/tests/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -import json - -import pytest - -from backy.repository import Repository - - -@pytest.fixture -def repository(schedule, tmp_path, log): - with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: - json.dump({"schedule": schedule.to_dict()}, f) - return Repository(tmp_path, log) diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index a2ae1b5f..966eb0e0 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -198,11 +198,11 @@ def test_ignore_duplicates(repository_with_revisions, tmp_path): assert 3 == len(a.history) -def test_find(repository, tmp_path, log): +def test_find(repository, log): rev = Revision.create(repository, set(), log, uuid="123-456") rev.materialize() repository.scan() - assert tmp_path / "123-456" == repository.find("0").filename + assert "123-456" == repository.find("0").uuid def test_find_should_raise_if_not_found(repository, log): diff --git a/src/backy/tests/test_report.py b/src/backy/tests/test_report.py index dec87946..f7d82378 100644 --- a/src/backy/tests/test_report.py +++ b/src/backy/tests/test_report.py @@ -1,22 +1,25 @@ +import pytest + from backy.report import ChunkMismatchReport -from backy.repository import Repository from backy.tests import Ellipsis -def test_quarantine(tmp_path, log, clock): - repo = Repository() - repo.add_report(ChunkMismatchReport(b"source", b"target", 3)) +@pytest.mark.skip +def test_quarantine(tmp_path, repository, log, clock): + repository.add_report(ChunkMismatchReport(b"source", b"target", 3)) with open( - (tmp_path / "quarantine" / repo.report_ids[0]).with_suffix(".report") + (tmp_path / "quarantine" / repository.report_ids[0]).with_suffix( + ".report" + ) ) as report: assert ( Ellipsis( f"""\ -uuid: {repo.report_ids[0]} +uuid: {repository.report_ids[0]} +timestamp: 2015-09-01 07:06:47+00:00 source_hash: 36cd38f49b9afa08222c0dc9ebfe35eb target_hash: 42aefbae01d2dfd981f7da7d823d689e offset: 3 -timestamp: 2015-09-01 07:06:47+00:00 traceback: |- ... File ".../src/backy/rbd/tests/test_quarantine.py", line ..., in test_quarantine diff --git a/src/backy/tests/test_revision.py b/src/backy/tests/test_revision.py index 5faf1a16..dd7b1407 100644 --- a/src/backy/tests/test_revision.py +++ b/src/backy/tests/test_revision.py @@ -56,7 +56,6 @@ def test_filenames_based_on_uuid_and_backup_dir(log): backup = mock.Mock() backup.path = Path("/srv/backup/foo") r = Revision.create(backup, set(), log, uuid="asdf") - assert r.filename == Path("/srv/backup/foo/asdf") assert r.info_filename == Path("/srv/backup/foo/asdf.rev") @@ -97,11 +96,5 @@ def test_delete_revision(repository, log): r = Revision.create(repository, set(), log, uuid="123-456") r.materialize() assert repository.path.joinpath("123-456.rev").exists() - repository.scan() - repository.path.joinpath("123-456").open("w") - assert repository.path.joinpath("123-456.rev").exists() r.remove() - # Ensure the revision data file exists - we do not implicitly create - # it any longer. - assert not repository.path.joinpath("123-456").exists() assert not repository.path.joinpath("123-456.rev").exists() diff --git a/src/backy/tests/test_schedule.py b/src/backy/tests/test_schedule.py index 8ee3a093..01de3a00 100644 --- a/src/backy/tests/test_schedule.py +++ b/src/backy/tests/test_schedule.py @@ -187,12 +187,12 @@ def add_revision(timestamp): # This revision is more than keep and also outside the interval. # It gets its tag removed and disappears. r = add_revision(datetime(2014, 5, 4, 11, 0, tzinfo=UTC)) - assert r.filename.with_suffix(".rev").exists() + assert r.info_filename.exists() removed = [x for x in schedule.expire(repository)] assert [r.uuid] == [x.uuid for x in removed] repository.scan() assert [{"daily"}] * 6 == [rev.tags for rev in repository.history] - assert not r.filename.with_suffix(".rev").exists() + assert not r.info_filename.exists() # If we have manual tags, then those do not expire. However, the # known and unknown tag disappear but then the file remains @@ -200,14 +200,14 @@ def add_revision(timestamp): r = add_revision(datetime(2014, 5, 4, 11, 0, tzinfo=UTC)) r.tags = {"daily", "manual:test", "unknown"} r.write_info() - assert r.filename.with_suffix(".rev").exists() + assert r.info_filename.exists() expired = schedule.expire(repository) assert [] == [x.uuid for x in expired] repository.scan() assert [{"manual:test"}] + [{"daily"}] * 6 == [ rev.tags for rev in repository.history ] - assert r.filename.with_suffix(".rev").exists() + assert r.info_filename.exists() def test_next_in_interval(clock): diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index 14c2b1ef..47738d2a 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -272,7 +272,7 @@ def test_roughly_compare_files_same(tmp_path): f.write(b"asdf" * 100) for x in range(20): - assert files_are_roughly_equal( + assert not files_are_roughly_equal( open("a", "rb"), open("b", "rb"), blocksize=10 ) @@ -288,7 +288,7 @@ def test_roughly_compare_files_1_changed_block(tmp_path): detected = 0 for x in range(20): - detected += files_are_roughly_equal( + detected += not files_are_roughly_equal( open("a", "rb"), open("b", "rb"), blocksize=10 ) @@ -304,9 +304,11 @@ def test_roughly_compare_files_timeout(tmp_path): # The files are different but we don't notice as we run into a timeout. # That's fine. - assert files_are_roughly_equal(open("a", "rb"), open("b", "rb"), timeout=0) + assert not files_are_roughly_equal( + open("a", "rb"), open("b", "rb"), timeout=0 + ) # Without the timeout we do notice - assert not files_are_roughly_equal(open("a", "rb"), open("b", "rb")) + assert files_are_roughly_equal(open("a", "rb"), open("b", "rb")) def test_copy_overwrite_correctly_makes_sparse_file(tmp_path): From abccd826180e206ca4447e1ccbaf5d6a407147ab Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Sat, 10 Aug 2024 14:34:53 +0200 Subject: [PATCH 22/25] snapshot: fix all tests --- src/backy/cli/__init__.py | 693 +++++++++++++++----------- src/backy/cli/tests/test_client.py | 188 +++---- src/backy/cli/tests/test_main.py | 680 ++++++++++--------------- src/backy/conftest.py | 10 + src/backy/daemon/__init__.py | 4 +- src/backy/daemon/api.py | 13 +- src/backy/daemon/scheduler.py | 26 +- src/backy/daemon/tests/test_daemon.py | 61 ++- src/backy/file/__init__.py | 8 +- src/backy/logging.py | 1 - src/backy/rbd/__init__.py | 35 +- src/backy/rbd/tests/test_ceph.py | 9 +- src/backy/rbd/tests/test_main.py | 26 +- src/backy/rbd/tests/test_source.py | 4 +- src/backy/report.py | 72 +-- src/backy/repository.py | 42 +- src/backy/revision.py | 1 + src/backy/s3/__init__.py | 53 ++ src/backy/schedule.py | 6 + src/backy/source.py | 21 +- src/backy/tests/test_report.py | 11 +- src/backy/tests/test_utils.py | 10 +- src/backy/utils.py | 37 +- 23 files changed, 1064 insertions(+), 947 deletions(-) diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py index bf0aabe5..48c1d72e 100644 --- a/src/backy/cli/__init__.py +++ b/src/backy/cli/__init__.py @@ -1,15 +1,15 @@ import argparse import asyncio +import inspect import re -import subprocess import sys +from functools import cached_property from pathlib import Path -from typing import Any, Dict, Literal, Optional +from typing import Any, Dict, List, Literal, Optional import humanize import structlog import tzlocal -import yaml from aiohttp import ClientResponseError from aiohttp.web_exceptions import HTTPNotFound from rich import print as rprint @@ -21,50 +21,44 @@ from backy import logging from backy.daemon import BackyDaemon from backy.daemon.api import Client -from backy.rbd import RestoreBackend - -# XXX invert this dependency from backy.repository import Repository -from backy.revision import Revision +from backy.revision import Revision, filter_manual_tags from backy.schedule import Schedule -from backy.utils import format_datetime_local, generate_taskid +from backy.source import SOURCE_PLUGINS, CmdLineSource +from backy.utils import BackyJSONEncoder, format_datetime_local, generate_taskid # single repo commands # (init) -# rev-parse Print full path or uuid of specified revisions +# rev-parse (job?, rev) Print full path or uuid of specified revisions -# log [--filter] (status) Show backup status. Show inventory and summary information +# log [--filter] (status) (rev) Show backup status. Show inventory and summary information -# backup [--fg] (remote) Perform a backup -# restore (remote) Restore (a given revision) to a given target +# backup [--bg] (job) Perform a backup +# restore Restore (a given revision) to a given target -# distrust Distrust specified revisions -# verify (remote) Verify specified revisions -# rm Forget specified revision -# tag Modify tags on revision +# distrust (job, rev) Distrust specified revisions +# verify (job, rev) Verify specified revisions +# rm (job, rev) Forget specified revision +# tag (job, rev) Modify tags on revision -# gc [--expire] [--remote|--local] (Expire revisions) and collect garbage from the repository. +# gc [--expire] [--remote|--local] (job) (Expire revisions) and collect garbage from the repository. -# pull? update metadata from all known remotes that host backups +# pull? (job) update metadata from all known remotes that host backups # for the same backup source -# check + +# reports list/show/delete # # multi-repo / daemon-based commands -# show-jobs List status of all known jobs +# check (job) +# show-jobs (job def: all) List status of all known jobs (integrated with log?) # show-daemon Daemon status - -# pull - -# backup --all [--include=filter] [--exclude=filter] [--fg] - -# check --all - +# reload # maybe add a common --repo/--job flag? @@ -74,51 +68,114 @@ class Command(object): path: Path config: Path - taskid: str + dry_run: bool + jobs: Optional[str] log: BoundLogger - def __init__(self, path: Path, config: Path, taskid, log: BoundLogger): + def __init__( + self, + path: Path, + config: Path, + dry_run: bool, + jobs: Optional[str], + log: BoundLogger, + ): self.path = path.resolve() self.config = config - self.taskid = taskid - self.log = log + self.dry_run = dry_run + self.jobs = jobs + self.log = log.bind(subsystem="command") - def __call__(self, cmdname: str, args: dict[str, Any]): - func = getattr(self, cmdname) - ret = func(**args) - if not isinstance(ret, int): + async def __call__(self, cmdname: str, kwargs: dict[str, Any]): + self.log.debug("call", func=cmdname, func_args=kwargs) + try: + func = getattr(self, cmdname) + params = inspect.signature(func).parameters ret = 0 - self.log.debug("return-code", code=ret) - return ret - - def create_api_client(self): + if "repo" in params and params["repo"].annotation == Repository: + for repo in await self.get_repos(): + r = func(repo=repo, **kwargs) + if asyncio.iscoroutine(r): + r = await r + if not isinstance(r, int): + r = 0 + ret = max(ret, r) + elif ( + "repos" in params + and params["repos"].annotation == List[Repository] + ): + ret = func(repos=await self.get_repos(), **kwargs) + if asyncio.iscoroutine(ret): + ret = await ret + if not isinstance(ret, int): + ret = 0 + else: + assert ( + self.jobs is None + ), "This subcommand does not support --jobs/-a" + ret = func(**kwargs) + if asyncio.iscoroutine(ret): + ret = await ret + if not isinstance(ret, int): + ret = 0 + self.log.debug("return-code", code=ret) + return ret + except Exception: + self.log.exception("failed") + return 1 + + @cached_property + def source(self) -> CmdLineSource: + return CmdLineSource.load(self.path, self.log) + + @cached_property + def api(self): d = BackyDaemon(self.config, self.log) d._read_config() - return Client.from_conf( - "", d.api_cli_default, self.taskid, self.log - ) + taskid = self.log._context.get("taskid", generate_taskid()) + return Client.from_conf("", d.api_cli_default, taskid, self.log) - def init(self, type): - sourcefactory = backy.source.factory_by_type(type) - source = sourcefactory(*sourcefactory.argparse()) - # TODO: check if repo already exists - repo = Repository(self.path / "config", source, Schedule(), self.log) - repo.connect() - repo.store() - - def rev_parse(self, revision: str, uuid: bool) -> None: - b = Repository.load(self.path, self.log) - b.connect() - for r in b.find_revisions(revision): + async def get_repos(self) -> List[Repository]: + if self.jobs is None: + return [self.source.repository] + else: + jobs = await self.api.get_jobs() + assert len(jobs) > 0, "daemon has no configured job" + reg = re.compile(self.jobs) + res = [ + Repository( + Path(job["path"]), + Schedule.from_dict(job["schedule"]), + self.log, + ) + for job in jobs + if reg.search(job["name"]) + ] + assert len(res) > 0, "--jobs filter did not match" + for r in res: + r.connect() + return res + + # + # def init(self, type): + # sourcefactory = backy.source.factory_by_type(type) + # source = sourcefactory(*sourcefactory.argparse()) + # # TODO: check if repo already exists + # repo = Repository(self.path / "config", source, Schedule(), self.log) + # repo.connect() + # repo.store() + + def rev_parse(self, repo: Repository, revision: str, uuid: bool) -> None: + for rev in repo.find_revisions(revision): if uuid: - print(r.uuid) + print(rev.uuid) else: - print(r.info_filename) + print(rev.info_filename) - def log_(self, yaml_: bool, revision: str) -> None: - revs = Repository(self.path, self.log).find_revisions(revision) - if yaml_: - print(yaml.safe_dump([r.to_dict() for r in revs])) + def log_(self, repo: Repository, json_: bool, revision: str) -> None: + revs = repo.find_revisions(revision) + if json_: + print(BackyJSONEncoder().encode([r.to_dict() for r in revs])) return total_bytes = 0 @@ -170,84 +227,69 @@ def log_(self, yaml_: bool, revision: str) -> None: len(revs), humanize.naturalsize(total_bytes, binary=True) ) ) - pending_changes = sum(1 for r in revs if r.pending_changes) - if pending_changes: - rprint( - f"[yellow]{pending_changes} pending change(s)[/] " - "(Push changes with `backy push`)" - ) - def backup(self, tags: str, force: bool) -> int: - b = Repository(self.path, self.log) - b._clean() - tags_ = set(t.strip() for t in tags.split(",")) - if not force: - b.validate_tags(tags_) - r = Revision.create(b, tags_, self.log) - r.materialize() - proc = subprocess.run( - [ - b.type.value, - "-t", - self.taskid, - "-b", - str(self.path), - "backup", - r.uuid, - ], - ) - b.scan() - b._clean() - return proc.returncode + async def backup( + self, repos: List[Repository], bg: bool, tags: str, force: bool + ) -> int: + if len(repos) > 1: + bg = True + + if bg: + for repo in repos: + log = self.log.bind(job_name=repo.name) + try: + # TODO support tags + await self.api.run_job(repo.name) + log.info("triggered-run") + except ClientResponseError as e: + if e.status == HTTPNotFound.status_code: + log.error("unknown-job") + return 1 + raise + return 0 + else: + repo = repos[0] + assert ( + self.source.repository.path == repo.path + ), "only the current job is supported without --bg" + repo._clean() + tags_ = set(t.strip() for t in tags.split(",")) + if not force: + repo.validate_tags(tags_) + r = Revision.create(repo, tags_, self.log) + r.materialize() + try: + return self.source.backup(r) + finally: + repo._clean() def restore( - self, revision: str, target: str, restore_backend: RestoreBackend + self, + revision: str, + **restore_args: Any, ) -> int: - b = Repository(self.path, self.log) - r = b.find(revision) - proc = subprocess.run( - [ - b.type.value, - "-t", - self.taskid, - "-b", - str(self.path), - "restore", - "--backend", - restore_backend.value, - r.uuid, - target, - ] + r = self.source.repository.find(revision) + return self.source.restore( + r, self.source.restore_type.from_args(**restore_args) ) - return proc.returncode - def distrust(self, revision: str) -> None: - b = Repository(self.path, self.log) - b.distrust(revision) + def distrust(self, repo: Repository, revision: str) -> None: + repo.distrust(repo.find_revisions(revision)) def verify(self, revision: str) -> int: - b = Repository(self.path, self.log) - r = b.find(revision) - proc = subprocess.run( - [ - b.type.value, - "-t", - self.taskid, - "-b", - str(self.path), - "verify", - r.uuid, - ] - ) - return proc.returncode + # TODO support multiple repos + ret = 0 + for r in self.source.repository.find_revisions(revision): + ret = max(ret, self.source.verify(r)) + return ret - def rm(self, revision: str) -> None: - b = Repository(self.path, self.log) - b.forget(revision) + def rm(self, repo: Repository, revision: str) -> None: + repo.rm(repo.find_revisions(revision)) def tags( self, - action: Literal["set", "add", "remove"], + repo: Repository, + tag_action: Literal["set", "add", "remove"], autoremove: bool, expect: Optional[str], revision: str, @@ -259,9 +301,8 @@ def tags( expect_ = None else: expect_ = set(t.strip() for t in expect.split(",")) - b = backy.repository.Repository(self.path, self.log) - success = b.tags( - action, + success = repo.tags( + tag_action, revision, tags_, expect=expect_, @@ -270,33 +311,77 @@ def tags( ) return int(not success) - def gc(self, expire: bool) -> None: - # XXX needs to update from remote API peers first (pull) - b = Repository(self.path, self.log) + def gc(self, repo: Repository, expire: bool, local: bool) -> None: + if expire and not local: + assert False # request pull from daemon + # XXX needs to update from remote API peers first (pull) + assert self.source.repository.path == repo.path if expire: - b.expire() - proc = subprocess.run( - [ - b.type.value, - "-t", - self.taskid, - "-b", - str(self.path), - "gc", - ] - ) - # if remote: - # push + repo.expire() + if expire and not local: + assert False # request push from daemon + self.source.gc() + + def reports_list(self, repo: Repository): + for id in repo.report_ids: + print(id) + + def reports_show(self, repo: Repository, reports: Optional[str]): + if reports is None: + ids = repo.report_ids + else: + ids = reports.split(",") + for id in ids: + path = repo.report_path.joinpath(id).with_suffix(".report") + print(id) + print(path.read_text(encoding="utf-8")) + + def reports_delete(self, repo: Repository, reports: Optional[str]): + log = self.log.bind(job_name=repo.name) + if reports is None: + ids = repo.report_ids + else: + ids = reports.split(",") + for id in ids: + path = repo.report_path.joinpath(id).with_suffix("report") + path.unlink() + log.info("report-deleted", id=id) + + def check(self, repo: Repository): + log = self.log.bind(job_name=repo.name) + exitcode = 0 + + manual_tags = set() + for rev in repo.history: + manual_tags |= filter_manual_tags(rev.tags) + if manual_tags: + log.info("check-manual-tags", manual_tags=", ".join(manual_tags)) + + unsynced_revs = {r for r in repo.history if r.pending_changes} + if unsynced_revs: + log.info("check-unsynced-revs", unsynced_revs=len(unsynced_revs)) + + if not repo.sla: + log.critical( + "check-sla-violation", + last_time=str( + repo.clean_history[-1].timestamp + if repo.clean_history + else None + ), + sla_overdue=repo.sla_overdue, + ) + exitcode = max(exitcode, 2) - def pull(self): - pass + if repo.report_ids: + log.warning("check-reports", reports=len(repo.report_ids)) + exitcode = max(exitcode, 1) - def check(self): - pass + return exitcode - def show_jobs(self, filter_re="") -> int: + async def show_jobs(self, repos: List[Repository]): """List status of all known jobs. Optionally filter by regex.""" - api = self.create_api_client() + repo_names = [r.name for r in repos] tz = format_datetime_local(None)[1] @@ -312,9 +397,11 @@ def show_jobs(self, filter_re="") -> int: "Next Tags", ) - jobs = asyncio.run(api.fetch_status(filter_re)) + jobs = await self.api.fetch_status(self.jobs) jobs.sort(key=lambda j: j["job"]) for job in jobs: + if job["job"] not in repo_names: + continue overdue = ( humanize.naturaldelta(job["sla_overdue"]) if job["sla_overdue"] @@ -339,22 +426,21 @@ def show_jobs(self, filter_re="") -> int: next_time, job["next_tags"], ) - backups = asyncio.run(api.list_backups()) - if filter_re: - backups = list(filter(re.compile(filter_re).search, backups)) + backups = await self.api.list_backups() + if self.jobs: + backups = list(filter(re.compile(self.jobs).search, backups)) for b in backups: t.add_row(b, "-", "-", "Dead", "-", "", "-", "-", "") rprint(t) print("{} jobs shown".format(len(jobs) + len(backups))) - def show_daemon(self): + async def show_daemon(self): """Show job status overview""" - api = self.create_api_client() t = Table("Status", "#") state_summary: Dict[str, int] = {} - jobs = asyncio.run(api.get_jobs()) - jobs += [{"status": "Dead"} for _ in asyncio.run(api.list_backups())] + jobs = await self.api.get_jobs() + jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] for job in jobs: state_summary.setdefault(job["status"], 0) state_summary[job["status"]] += 1 @@ -363,60 +449,9 @@ def show_daemon(self): t.add_row(state, str(state_summary[state])) rprint(t) - def run(self, job: str): - """Trigger immediate run for one job""" - try: - self.api.run_job(job) - except ClientResponseError as e: - if e.status == HTTPNotFound.status_code: - self.log.error("unknown-job", job=job) - sys.exit(1) - raise - self.log.info("triggered-run", job=job) - - def runall(self): - """Trigger immediate run for all jobs""" - jobs = self.api.get_jobs() - for job in jobs: - self.run(job["name"]) - - def reload(self): + async def reload_daemon(self): """Reload the configuration.""" - self.log.info("reloading-daemon") - self.api.reload_daemon() - self.log.info("reloaded-daemon") - - def check(self): - status = self.api.fetch_status() - - exitcode = 0 - - for job in status: - log = self.log.bind(job_name=job["job"]) - if job["manual_tags"]: - log.info( - "check-manual-tags", - manual_tags=job["manual_tags"], - ) - if job["unsynced_revs"]: - self.log.info( - "check-unsynced-revs", unsynced_revs=job["unsynced_revs"] - ) - if job["sla"] != "OK": - log.critical( - "check-sla-violation", - last_time=str(job["last_time"]), - sla_overdue=job["sla_overdue"], - ) - exitcode = max(exitcode, 2) - if job["quarantine_reports"]: - log.warning( - "check-quarantined", reports=job["quarantine_reports"] - ) - exitcode = max(exitcode, 1) - - self.log.info("check-exit", exitcode=exitcode, jobs=len(status)) - raise SystemExit(exitcode) + await self.api.reload_daemon() def main(): @@ -433,8 +468,10 @@ def main(): default="/etc/backy.conf", help="(default: %(default)s)", ) + parser.add_argument( "-C", + dest="workdir", default=".", type=Path, help=( @@ -443,8 +480,30 @@ def main(): ), ) + parser.add_argument( + "-n", "--dry-run", action="store_true", help="Do not modify state." + ) + job_filter = parser.add_mutually_exclusive_group() + job_filter.add_argument( + "--jobs", + dest="jobs", + metavar="", + help="Optional job filter regex. Defaults to current workdir", + ) + + job_filter.add_argument( + "-a", + "--all", + action="store_const", + const=".*", + dest="jobs", + help="Shortcut to select all jobs", + ) + subparsers = parser.add_subparsers() + # TODO + # INIT p = subparsers.add_parser("init", help="Create an empty backy repository.") p.add_argument( "type", @@ -453,72 +512,7 @@ def main(): ) p.set_defaults(func="init") - p = subparsers.add_parser("show-jobs", help="List status of all known jobs") - p.add_argument( - "filter_re", - default="", - metavar="[filter]", - nargs="?", - help="Optional job filter regex", - ) - p.set_defaults(func="show_jobs") - - p = subparsers.add_parser("show-daemon", help="Show job status overview") - p.set_defaults(func="show_daemon") - - p = subparsers.add_parser( - "check", - help="Check whether all jobs adhere to their schedules' SLA", - ) - p.set_defaults(func="check") - - p = subparsers.add_parser( - "backup", - help="Perform a backup", - ) - p.add_argument( - "-f", "--force", action="store_true", help="Do not validate tags" - ) - p.add_argument("tags", help="Tags to apply to the backup") - p.set_defaults(func="backup") - - p = subparsers.add_parser( - "restore", - help="Restore (a given revision) to a given target", - ) - p.add_argument( - "--backend", - type=RestoreBackend, - choices=list(RestoreBackend), - default=RestoreBackend.AUTO, - dest="restore_backend", - help="(default: %(default)s)", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="latest", - help="use revision SPEC as restore source (default: %(default)s)", - ) - p.add_argument( - "target", - metavar="TARGET", - help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', - ) - p.set_defaults(func="restore") - - p = subparsers.add_parser( - "gc", - help="Purge the backup store (i.e. chunked) from unused data", - ) - p.add_argument( - "--expire", - action="store_true", - help="Expire tags according to schedule", - ) - p.set_defaults(func="gc") - + # REV-PARSE p = subparsers.add_parser( "rev-parse", help="Print full path or uuid of specified revisions", @@ -532,16 +526,17 @@ def main(): "-r", "--revision", metavar="SPEC", - default="latest", + default="all", help="use revision SPEC to find (default: %(default)s)", ) p.set_defaults(func="rev_parse") + # LOG p = subparsers.add_parser( "log", help="Show backup status. Show inventory and summary information", ) - p.add_argument("--yaml", dest="yaml_", action="store_true") + p.add_argument("--json", dest="json_", action="store_true") p.add_argument( "-r", "--revision", @@ -551,6 +546,42 @@ def main(): ) p.set_defaults(func="log_") + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.add_argument( + "-f", "--force", action="store_true", help="Do not validate tags" + ) + p.add_argument( + "--bg", + action="store_true", + help="Let the daemon run the backup job. Implied if if more than one job is selected.", + ) + p.add_argument("tags", help="Tags to apply to the backup") + p.set_defaults(func="backup") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target. The arguments vary for the different repo types.", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="latest", + help="use revision SPEC as restore source (default: %(default)s)", + ) + restore_subparsers = p.add_subparsers() + for source_type in SOURCE_PLUGINS: + source = source_type.load() + source.restore_type.setup_argparse( + restore_subparsers.add_parser(source.type_) + ) + p.set_defaults(func="restore") + # DISTRUST p = subparsers.add_parser( "distrust", @@ -565,6 +596,7 @@ def main(): ) p.set_defaults(func="distrust") + # VERIFY p = subparsers.add_parser( "verify", help="Verify specified revisions", @@ -578,19 +610,21 @@ def main(): ) p.set_defaults(func="verify") + # RM p = subparsers.add_parser( - "forget", - help="Forget specified revision", + "rm", + help="Remove specified revision", ) p.add_argument( "-r", "--revision", metavar="SPEC", required=True, - help="use revision SPEC to forget", + help="use revision SPEC to remove", ) - p.set_defaults(func="forget") + p.set_defaults(func="rm") + # TAGS p = subparsers.add_parser( "tags", help="Modify tags on revision", @@ -609,7 +643,7 @@ def main(): help="Do nothing if tags differ from the expected tags", ) p.add_argument( - "action", + "tag_action", choices=["set", "add", "remove"], ) p.add_argument( @@ -627,33 +661,98 @@ def main(): ) p.set_defaults(func="tags") + # GC + p = subparsers.add_parser( + "gc", + help="Purge the backup store (i.e. chunked) from unused data", + ) + p.add_argument( + "--expire", + action="store_true", + help="Expire tags according to schedule", + ) + p.add_argument( + "--local", + action="store_true", + help="Do not expire on remote servers", + ) + p.set_defaults(func="gc") + + # REPORTS-LIST + p = subparsers.add_parser("reports-list", help="List problem reports") + p.set_defaults(func="reports_list") + + # REPORTS-SHOW + p = subparsers.add_parser("reports-show", help="Show problem report") + p.add_argument( + "reports", + nargs="?", + metavar="", + help="comma separated list of report uuids", + ) + + p.set_defaults(func="reports_show") + + # REPORTS-DELETE + p = subparsers.add_parser("reports-delete", help="Delete problem report") + report_sel = p.add_mutually_exclusive_group(required=True) + report_sel.add_argument( + "reports", + nargs="?", + metavar="", + help="comma separated list of report uuids", + ) + report_sel.add_argument( + "--all-reports", + action="store_const", + const=None, + dest="reports", + help="Select all reports", + ) + p.set_defaults(func="reports_delete") + + # CHECK + p = subparsers.add_parser( + "check", + help="Check whether the selected jobs adhere to their schedules' SLA", + ) + p.set_defaults(func="check") + + # TODO: job filter default + # SHOW JOBS + p = subparsers.add_parser("show-jobs", help="List status of all known jobs") + p.set_defaults(func="show_jobs") + + # SHOW DAEMON + p = subparsers.add_parser("show-daemon", help="Show job status overview") + p.set_defaults(func="show_daemon") + + # RELOAD DAEMON + p = subparsers.add_parser("reload-daemon", help="Reload daemon config") + p.set_defaults(func="reload_daemon") + args = parser.parse_args() if not hasattr(args, "func"): parser.print_usage() sys.exit(0) - task_id = generate_taskid() - # Logging - logging.init_logging(args.verbose, defaults={"taskid": task_id}) + logging.init_logging(args.verbose, defaults={"taskid": generate_taskid()}) log = structlog.stdlib.get_logger(subsystem="command") log.debug("invoked", args=" ".join(sys.argv)) - command = Command(args.C, args.config, task_id, log) + command = Command(args.workdir, args.config, args.dry_run, args.jobs, log) func = args.func # Pass over to function func_args = dict(args._get_kwargs()) del func_args["func"] del func_args["verbose"] + del func_args["workdir"] del func_args["config"] - del func_args["C"] - - try: - log.debug("parsed", func=args.func, func_args=func_args) - sys.exit(command(func, func_args)) - except Exception: - log.exception("failed") - sys.exit(1) + del func_args["dry_run"] + del func_args["jobs"] + + sys.exit(asyncio.run(command(func, func_args))) diff --git a/src/backy/cli/tests/test_client.py b/src/backy/cli/tests/test_client.py index 41debfff..6dd8dcae 100644 --- a/src/backy/cli/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -14,8 +14,13 @@ @pytest.fixture -def log(log): - return log.bind(job_name="-") +async def daemon(tmp_path, monkeypatch, log): + # FIXME + from backy.daemon.tests.test_daemon import daemon + + gen = daemon.__pytest_wrapped__.obj(tmp_path, monkeypatch, log) + async for i in gen: + yield i @pytest.fixture @@ -67,12 +72,15 @@ async def api_client(api, aiohttp_client, log): @pytest.fixture -async def cli_client(api_client, log): - return Command(api_client, log) +async def command(tmp_path, api_client, log): + cmd = Command(tmp_path, tmp_path / "config", False, ".*", log) + cmd.api = api_client + return cmd -async def test_cli_jobs(cli_client, capsys): - await cli_client.jobs() +async def test_show_jobs(command, capsys): + exitcode = await command("show_jobs", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( Ellipsis( @@ -96,7 +104,9 @@ async def test_cli_jobs(cli_client, capsys): == out ) - await cli_client.jobs(filter_re="test01") + command.jobs = "test01" + exitcode = await command("show_jobs", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( Ellipsis( @@ -116,26 +126,15 @@ async def test_cli_jobs(cli_client, capsys): == out ) - await cli_client.jobs(filter_re="asdf") - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -┏━━━━━┳━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ ┃ ┃ ┃ ┃ Last ┃ ┃ ┃ Next ┃ ┃ -┃ ┃ ┃ SLA ┃ ┃ Backup ┃ Last ┃ Last ┃ Backup ┃ Next ┃ -┃ Job ┃ SLA ┃ overdue ┃ Status ┃ ... ┃ Tags ┃ Durat… ┃ ... ┃ Tags ┃ -┡━━━━━╇━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -└─────┴─────┴─────────┴────────┴─────────┴─────────┴────────┴─────────┴────────┘ -0 jobs shown -""" - ) - == out - ) + command.jobs = "asdf" + exitcode = await command("show_jobs", {}) + assert exitcode == 1 -async def test_cli_status(cli_client, capsys): - await cli_client.status() +async def test_show_daemon(command, capsys): + command.jobs = None + exitcode = await command("show_daemon", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( """\ @@ -150,126 +149,134 @@ async def test_cli_status(cli_client, capsys): ) -async def test_cli_run(daemon, cli_client, monkeypatch): +async def test_backup_bg(daemon, command, monkeypatch): utils.log_data = "" run = mock.Mock() monkeypatch.setattr(daemon.jobs["test01"].run_immediately, "set", run) - await cli_client.run("test01") + command.jobs = "test01" + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} + ) + assert exitcode == 0 run.assert_called_once() assert ( Ellipsis( """\ +... D - command/call func='backup' func_args={'bg': True, 'tags': 'manual:a', 'force': False} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ +... D ~cli[ABCD] api/request-result response=... status_code=200 +... D test01 repo/scan-reports entries=0 ... D ~[ABCD] api/new-conn path='/v1/jobs/test01/run' query='' ... I ~cli[ABCD] api/get-job name='test01' ... I ~cli[ABCD] api/run-job name='test01' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='test01' +... I test01 command/triggered-run \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_run_missing(daemon, cli_client): +async def test_backup_bg_missing(daemon, command): utils.log_data = "" - try: - await cli_client.run("aaaa") - except SystemExit as e: - assert e.code == 1 - - assert ( - Ellipsis( - """\ -... D ~[ABCD] api/new-conn path='/v1/jobs/aaaa/run' query='' -... I ~cli[ABCD] api/get-job name='aaaa' -... I ~cli[ABCD] api/get-job-not-found name='aaaa' -... D ~cli[ABCD] api/request-result status_code=404 -... E - CLIClient/unknown-job job='aaaa' -""" - ) - == utils.log_data + command.jobs = "aaaa" + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} ) + assert exitcode == 1 -async def test_cli_runall(daemon, cli_client, monkeypatch): +async def test_backup_bg_all(daemon, command, monkeypatch): utils.log_data = "" run1 = mock.Mock() run2 = mock.Mock() monkeypatch.setattr(daemon.jobs["test01"].run_immediately, "set", run1) monkeypatch.setattr(daemon.jobs["foo00"].run_immediately, "set", run2) - await cli_client.runall() + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} + ) + assert exitcode == 0 run1.assert_called_once() run2.assert_called_once() assert ( Ellipsis( """\ +... D - command/call func='backup' func_args={'bg': True, 'tags': 'manual:a', 'force': False} ... D ~[ABCD] api/new-conn path='/v1/jobs' query='' ... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 ... D ~[ABCD] api/new-conn path='/v1/jobs/test01/run' query='' ... I ~cli[ABCD] api/get-job name='test01' ... I ~cli[ABCD] api/run-job name='test01' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='test01' +... I test01 command/triggered-run \n\ ... D ~[ABCD] api/new-conn path='/v1/jobs/foo00/run' query='' ... I ~cli[ABCD] api/get-job name='foo00' ... I ~cli[ABCD] api/run-job name='foo00' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='foo00' +... I foo00 command/triggered-run \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_reload(daemon, cli_client, monkeypatch): +async def test_reload(daemon, command, monkeypatch): utils.log_data = "" reload = mock.Mock() monkeypatch.setattr(daemon, "reload", reload) - await cli_client.reload() + command.jobs = None + exitcode = await command("reload_daemon", {}) + assert exitcode == 0 reload.assert_called_once() assert ( Ellipsis( """\ -... I - CLIClient/reloading-daemon \n\ +... D - command/call func='reload_daemon' func_args={} ... D ~[ABCD] api/new-conn path='/v1/reload' query='' ... I ~cli[ABCD] api/reload-daemon \n\ ... D ~cli[ABCD] api/request-result status_code=204 -... I - CLIClient/reloaded-daemon \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_ok(daemon, cli_client): +async def test_check_ok(daemon, command): utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 0 + exitcode = await command("check", {}) + assert exitcode == 0 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... I - CLIClient/check-exit exitcode=0 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_too_old(daemon, clock, cli_client, log): +async def test_check_too_old(daemon, clock, command, log): job = daemon.jobs["test01"] revision = Revision.create(job.repository, set(), log) revision.timestamp = utils.now() - datetime.timedelta(hours=48) @@ -277,66 +284,69 @@ async def test_cli_check_too_old(daemon, clock, cli_client, log): revision.materialize() utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 2 + exitcode = await command("check", {}) + assert exitcode == 2 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... C test01 CLIClient/check-sla-violation last_time='2015-08-30 07:06:47+00:00' sla_overdue=172800.0 -... I - CLIClient/check-exit exitcode=2 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... C test01 command/check-sla-violation last_time='2015-08-30 07:06:47+00:00' sla_overdue=172800.0 +... D - command/return-code code=2 """ ) == utils.log_data ) -async def test_cli_check_manual_tags(daemon, cli_client, log): +async def test_check_manual_tags(daemon, command, log): job = daemon.jobs["test01"] revision = Revision.create(job.repository, {"manual:test"}, log) revision.stats["duration"] = 60.0 revision.materialize() utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 0 + exitcode = await command("check", {}) + assert exitcode == 0 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... I test01 CLIClient/check-manual-tags manual_tags='manual:test' -... I - CLIClient/check-exit exitcode=0 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... I test01 command/check-manual-tags manual_tags='manual:test' +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_quarantine(daemon, cli_client, log): +async def test_check_quarantine(daemon, command, log): job = daemon.jobs["test01"] - job.repository.quarantine.add_report(ChunkMismatchReport(b"a", b"b", 0)) + job.repository.add_report(ChunkMismatchReport(b"a", b"b", 0)) utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 1 + exitcode = await command("check", {}) + assert exitcode == 1 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... W test01 CLIClient/check-quarantined reports=1 -... I - CLIClient/check-exit exitcode=1 jobs=2 +... D test01 repo/scan-reports entries=1 +... D foo00 repo/scan-reports entries=0 +... W test01 command/check-reports reports=1 +... D - command/return-code code=1 """ ) == utils.log_data diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py index 90598a22..0e365b07 100644 --- a/src/backy/cli/tests/test_main.py +++ b/src/backy/cli/tests/test_main.py @@ -1,27 +1,20 @@ import datetime import os -import pprint -import sys -from functools import partialmethod +from dataclasses import dataclass +from pathlib import Path +from unittest.mock import create_autospec import pytest import backy.cli import backy.repository +import backy.source from backy import utils +from backy.repository import Repository from backy.revision import Revision from backy.tests import Ellipsis -@pytest.fixture -def argv(): - original = sys.argv - new = original[:1] - sys.argv = new - yield new - sys.argv = original - - def test_display_usage(capsys, argv): with pytest.raises(SystemExit) as exit: backy.cli.main() @@ -29,9 +22,9 @@ def test_display_usage(capsys, argv): out, err = capsys.readouterr() assert ( """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} +usage: pytest [-h] [-v] [-c CONFIG] [-C WORKDIR] [-n] + [--jobs | -a] + {init,rev-parse,log,backup,restore,distrust,verify,rm,tags,gc,reports-list,reports-show,reports-delete,check,show-jobs,show-daemon,reload-daemon} ... """ == out @@ -39,22 +32,6 @@ def test_display_usage(capsys, argv): assert err == "" -def test_display_client_usage(capsys, argv): - argv.append("client") - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... -""" - == out - ) - assert err == "" - - def test_display_help(capsys, argv): argv.append("--help") with pytest.raises(SystemExit) as exit: @@ -64,33 +41,12 @@ def test_display_help(capsys, argv): assert ( Ellipsis( """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} +usage: pytest [-h] [-v] [-c CONFIG] [-C WORKDIR] [-n] + [--jobs | -a] + {init,rev-parse,log,backup,restore,distrust,verify,rm,tags,gc,reports-list,reports-show,reports-delete,check,show-jobs,show-daemon,reload-daemon} ... -Backup and restore for block devices. - -positional arguments: -... -""" - ) - == out - ) - assert err == "" - - -def test_display_client_help(capsys, argv): - argv.extend(["client", "--help"]) - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... +Backy command line client. positional arguments: ... @@ -101,63 +57,215 @@ def test_display_client_help(capsys, argv): assert err == "" -def test_verbose_logging(capsys, argv): - # This is just a smoke test to ensure the appropriate code path - # for -v is covered. - argv.extend(["-v"]) - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - - -def print_args(*args, return_value=None, **kw): - print(args) - pprint.pprint(kw) - return return_value - - -async def async_print_args(*args, **kw): - print_args(*args, **kw) - +@dataclass +class Instance: + cls: type -def test_call_status(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.cli.Command, "status", print_args) - argv.extend(["-v", "-b", str(backup.path), "status"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'revision': 'all', 'yaml_': False} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... status' -... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} -... D command/successful \n\ -""" - ) - == utils.log_data - ) + def __eq__(self, other): + return isinstance(other, self.cls) -@pytest.mark.parametrize("success", [False, True]) -def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): - os.makedirs(tmp_path / "backy") - os.chdir(tmp_path / "backy") +@pytest.mark.parametrize( + ["fun", "args", "rv", "rc", "params"], + [ + ( + "rev_parse", + ["rev-parse", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1", "uuid": False}, + ), + ( + "log_", + ["log"], + None, + 0, + {"repo": Instance(Repository), "json_": False, "revision": "all"}, + ), + ( + "backup", + ["backup", "manual:test"], + 0, + 0, + { + "repos": [Instance(Repository)], + "bg": False, + "tags": "manual:test", + "force": False, + }, + ), + ( + "backup", + ["backup", "--force", "--bg", "manual:test"], + 1, + 1, + { + "repos": [Instance(Repository)], + "bg": True, + "tags": "manual:test", + "force": True, + }, + ), + ( + "restore", + ["restore", "-r", "1", "file", "out.bin"], + None, + 0, + {"revision": "1", "target": Path("out.bin")}, + ), + ( + "distrust", + ["distrust", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1"}, + ), + ( + "verify", + ["verify", "-r", "1"], + None, + 0, + {"revision": "1"}, + ), + ( + "rm", + ["rm", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1"}, + ), + ( + "tags", + ["tags", "set", "-r", "last", "manual:a"], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "set", + "autoremove": False, + "expect": None, + "revision": "last", + "tags": "manual:a", + "force": False, + }, + ), + ( + "tags", + [ + "tags", + "remove", + "-r", + "last", + "--autoremove", + "--expect", + "manual:b", + "manual:a", + ], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "remove", + "autoremove": True, + "expect": "manual:b", + "revision": "last", + "tags": "manual:a", + "force": False, + }, + ), + ( + "tags", + ["tags", "add", "-r", "last", "--force", "manual:a"], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "add", + "autoremove": False, + "expect": None, + "revision": "last", + "tags": "manual:a", + "force": True, + }, + ), + ( + "gc", + ["gc", "--expire"], + None, + 0, + {"repo": Instance(Repository), "expire": True, "local": False}, + ), + ( + "reports_list", + ["reports-list"], + None, + 0, + {"repo": Instance(Repository)}, + ), + ( + "reports_show", + ["reports-show"], + None, + 0, + {"repo": Instance(Repository), "reports": None}, + ), + ( + "reports_delete", + ["reports-delete", "--all-reports"], + None, + 0, + {"repo": Instance(Repository), "reports": None}, + ), + ( + "check", + ["check"], + None, + 0, + {"repo": Instance(Repository)}, + ), + ( + "show_jobs", + ["show-jobs"], + None, + 0, + {"repos": [Instance(Repository)]}, + ), + ( + "show_daemon", + ["show-daemon"], + None, + 0, + {}, + ), + ( + "reload_daemon", + ["reload-daemon"], + None, + 0, + {}, + ), + ], +) +def test_call_fun( + fun, + args, + rv, + rc, + params, + argv, + tmp_path, + monkeypatch, + log, +): + path = tmp_path / "test00" + path.mkdir() + os.chdir(path) - with open(tmp_path / "backy" / "config", "w", encoding="utf-8") as f: + with open(path / "config", "w", encoding="utf-8") as f: f.write( f""" --- +path: "{path}" schedule: daily: interval: 1d @@ -168,300 +276,45 @@ def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): """ ) - monkeypatch.setattr( - backy.repository.Repository, - "backup", - partialmethod(print_args, return_value=success), - ) - argv.extend(["-v", "backup", "manual:test"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.cli.main() - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(, {'manual:test'}, False) -{} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v backup manual:test' -... D command/parsed func='backup' func_args={{'force': False, 'tags': 'manual:test'}} -... D quarantine/scan entries=0 -... D command/return-code code={int(not success)} -""" - ) - == utils.log_data - ) - assert exit.value.code == int(not success) - - -def test_call_find(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.cli.Command, "find", print_args) - argv.extend(["-v", "-b", str(backup.path), "find", "-r", "1"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'revision': '1', 'uuid': False} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... find -r 1' -... D command/parsed func='find' func_args={'uuid': False, 'revision': '1'} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -@pytest.mark.parametrize( - ["action", "args"], - [ - ("jobs", {"filter_re": "test"}), - ("status", dict()), - ("run", {"job": "test"}), - ("runall", dict()), - ("reload", dict()), - ("check", dict()), - ], -) -def test_call_client( - capsys, backup, argv, monkeypatch, log, tmp_path, action, args -): - monkeypatch.setattr(backy.cli.client.CLIClient, action, async_print_args) - conf = str(tmp_path / "conf") - with open(conf, "w") as c: - c.write( - f"""\ -global: - base-dir: {str(tmp_path)} -api: - addrs: "127.0.0.1, ::1" - port: 1234 - cli-default: - token: "test" - -schedules: {{}} -jobs: {{}} -""" - ) - - argv.extend(["-v", "client", "-c", conf, action, *args.values()]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{args} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v client -c ... {action}{" "*bool(args)}{", ".join(args.values())}' -... D command/parsed func='client' func_args={{'config': PosixPath('...'), 'peer': None, \ -'url': None, 'token': None{", "*bool(args)}{str(args)[1:-1]}, 'apifunc': '{action}'}} -... D daemon/read-config ... -... D command/return-code code=0 -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -def test_call_scheduler(capsys, backup, argv, monkeypatch, tmp_path): - monkeypatch.setattr(backy.cli.Command, "scheduler", print_args) - argv.extend( - [ - "-v", - "-b", - str(backup.path), - "-l", - str(tmp_path / "backy.log"), - "scheduler", - ] - ) + mock = create_autospec(getattr(backy.cli.Command, fun), return_value=rv) + monkeypatch.setattr(backy.cli.Command, fun, mock) + argv.extend(["-v", "-C", str(path), *args]) utils.log_data = "" with pytest.raises(SystemExit) as exit: backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'config': PosixPath('/etc/backy.conf')} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... scheduler' -... D command/parsed func='scheduler' func_args={'config': PosixPath('/etc/backy.conf')} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -@pytest.mark.parametrize("action", ["set", "add", "remove"]) -def test_call_tags(capsys, backup, argv, monkeypatch, action): - monkeypatch.setattr(backy.cli.Command, "tags", print_args) - argv.extend( - ["-v", "-b", str(backup.path), "tags", action, "-r", "last", "manual:a"] - ) - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{{'action': '{action}', - 'autoremove': False, - 'expect': None, - 'force': False, - 'revision': 'last', - 'tags': 'manual:a'}} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D quarantine/scan entries=0 -... D command/invoked args='... -v -b ... tags {action} -r last manual:a' -... D command/parsed func='tags' func_args={{'autoremove': False, 'force': False, 'expect': None, \ -'action': '{action}', 'revision': 'last', 'tags': 'manual:a'}} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -def test_call_expire(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.cli.Command, "expire", print_args) - argv.extend(["-v", "-b", str(backup.path), "expire"]) - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D quarantine/scan entries=0 -... D command/invoked args='... -v -b ... expire' -... D command/parsed func='expire' func_args={} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - + assert exit.value.code == rc + mock.assert_called_once() + assert (Instance(backy.cli.Command),) == mock.call_args.args + assert params == mock.call_args.kwargs -@pytest.mark.parametrize("action", ["pull", "push"]) -def test_call_pull_push(capsys, backup, argv, monkeypatch, tmp_path, action): - monkeypatch.setattr(backy.cli.Command, action, print_args) - conf = tmp_path / "conf" - with open(conf, "w") as c: - c.write( - f"""\ -global: - base-dir: {str(tmp_path)} -api: - addrs: "127.0.0.1, ::1" - port: 1234 - cli-default: - token: "test" -peers : {{}} -schedules: {{}} -jobs: {{}} -""" - ) + expected = "" + expected += f"... D - command/invoked args='... -v -C ... {' '.join(args)}'\n" + expected += f"... D - command/call func='{fun}' func_args=...\n" + if "repo" in params or "repos" in params: + expected += "... D test00 repo/scan-reports entries=0\n" + expected += f"... D - command/return-code code={rc}\n" - argv.extend(["-v", "-b", str(backup.path), action, "-c", str(conf)]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.cli.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{{'config': {repr(conf)}}} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v -b {backup.path} {action} -c {conf}' -... D command/parsed func='{action}' func_args={{'config': {repr(conf)}}} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 + assert Ellipsis(expected) == utils.log_data def test_call_unexpected_exception( - capsys, backup, argv, monkeypatch, log, tmp_path + capsys, repository, argv, monkeypatch, log, tmp_path ): def do_raise(*args, **kw): raise RuntimeError("test") - monkeypatch.setattr(backy.cli.Command, "status", do_raise) + monkeypatch.setattr(backy.cli.Command, "log_", do_raise) import os monkeypatch.setattr(os, "_exit", lambda x: None) argv.extend( - ["-l", str(tmp_path / "backy.log"), "-b", str(backup.path), "status"] + [ + "-v", + "-C", + str(repository.path), + "log", + ] ) utils.log_data = "" with pytest.raises(SystemExit): @@ -471,12 +324,12 @@ def do_raise(*args, **kw): assert ( Ellipsis( """\ -... D command/invoked args='... -l ... -b ... status' -... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} -... E command/failed exception_class='builtins.RuntimeError' exception_msg='test' +... D - command/invoked args='... -v -C ... log' +... D - command/call func='log_' func_args=... +... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): -exception>\t File ".../src/backy/cli/__init__.py", line ..., in main -exception>\t ret = func(**func_args) +exception>\t File ".../src/backy/cli/__init__.py", line ..., in __call__ +exception>\t ret = func(**kwargs) exception>\t File ".../src/backy/cli/tests/test_main.py", line ..., in do_raise exception>\t raise RuntimeError("test") exception>\tRuntimeError: test @@ -487,26 +340,29 @@ def do_raise(*args, **kw): def test_commands_wrapper_status( - backup, tmp_path, capsys, clock, tz_berlin, log + repository, tmp_path, capsys, clock, tz_berlin, log ): - commands = backy.cli.Command(tmp_path, "AAAA", log) + commands = backy.cli.Command( + tmp_path, tmp_path / "config", False, ".*", log + ) - revision1 = Revision.create(backup, {"daily"}, log, uuid="1") + revision1 = Revision.create(repository, {"daily"}, log, uuid="1") revision1.materialize() - revision2 = Revision.create(backup, {"daily"}, log, uuid="2") + revision2 = Revision.create(repository, {"daily"}, log, uuid="2") revision2.timestamp = backy.utils.now() + datetime.timedelta(hours=1) revision2.server = "remote" revision2.orig_tags = {"daily"} revision2.materialize() - revision3 = Revision.create(backup, {"new", "same"}, log, uuid="3") + revision3 = Revision.create(repository, {"new", "same"}, log, uuid="3") revision3.timestamp = backy.utils.now() + datetime.timedelta(hours=2) revision3.server = "remote" revision3.orig_tags = {"old", "same"} revision3.materialize() - commands.status(yaml_=False, revision="all") + repository.connect() + commands.log_(repository, json_=False, revision="all") out, err = capsys.readouterr() assert err == "" @@ -524,38 +380,38 @@ def test_commands_wrapper_status( │ 11:06:47 │ │ │ │ │ │ │ └────────────────┴────┴─────────┴──────────┴────────────────┴─────────┴────────┘ 3 revisions containing 0 Bytes data (estimated) -1 pending change(s) (Push changes with `backy push`) """ ) -def test_commands_wrapper_status_yaml( - backup, tmp_path, capsys, clock, tz_berlin, log +def test_commands_wrapper_status_json( + repository, tmp_path, capsys, clock, tz_berlin, log ): - commands = backy.cli.Command(tmp_path, "AAAA", log) + commands = backy.cli.Command( + tmp_path, tmp_path / "config", False, ".*", log + ) - revision = Revision.create(backup, set(), log, uuid="1") + revision = Revision.create(repository, set(), log, uuid="1") revision.stats["duration"] = 3.5 revision.stats["bytes_written"] = 42 revision.materialize() - commands.status(yaml_=True, revision="all") + repository.connect() + commands.log_(repository, json_=True, revision="all") out, err = capsys.readouterr() assert err == "" assert ( out - == f"""\ -- orig_tags: [] - parent: '' - server: '' - stats: - bytes_written: 42 - duration: 3.5 - tags: [] - timestamp: 2015-09-01 07:06:47+00:00 - trust: trusted - uuid: '1' - + == """\ +[{\ +"uuid": "1", \ +"timestamp": "2015-09-01T07:06:47+00:00", \ +"parent": "", "stats": {"bytes_written": 42, "duration": 3.5}, \ +"trust": "trusted", \ +"tags": [], \ +"orig_tags": [], \ +"server": ""\ +}] """ ) diff --git a/src/backy/conftest.py b/src/backy/conftest.py index 80242b96..5f9b9026 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -1,6 +1,7 @@ import datetime import os import random +import sys from unittest import mock from zoneinfo import ZoneInfo @@ -125,3 +126,12 @@ async def async_invoke(self, *args): monkeypatch.setattr(backy.source.CmdLineSource, "invoke", sync_invoke) monkeypatch.setattr(backy.source.AsyncCmdLineSource, "invoke", async_invoke) + + +@pytest.fixture +def argv(): + original = sys.argv + new = original[:1] + sys.argv = new + yield new + sys.argv = original diff --git a/src/backy/daemon/__init__.py b/src/backy/daemon/__init__.py index 225b3235..34ba8799 100644 --- a/src/backy/daemon/__init__.py +++ b/src/backy/daemon/__init__.py @@ -350,8 +350,8 @@ def status( result.append( dict( job=job.name, - sla="OK" if job.sla else "TOO OLD", - sla_overdue=job.sla_overdue, + sla="OK" if job.repository.sla else "TOO OLD", + sla_overdue=job.repository.sla_overdue, status=job.status, last_time=history[-1].timestamp if history else None, last_tags=( diff --git a/src/backy/daemon/api.py b/src/backy/daemon/api.py index 87b5cc02..f20afcde 100644 --- a/src/backy/daemon/api.py +++ b/src/backy/daemon/api.py @@ -1,7 +1,6 @@ import datetime import re from asyncio import get_running_loop -from json import JSONEncoder from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple import aiohttp @@ -22,7 +21,7 @@ import backy.repository from backy.repository import Repository, StatusDict from backy.revision import Revision -from backy.utils import generate_taskid +from backy.utils import BackyJSONEncoder, generate_taskid if TYPE_CHECKING: from backy.daemon import BackyDaemon @@ -30,16 +29,6 @@ from .scheduler import Job -class BackyJSONEncoder(JSONEncoder): - def default(self, o: Any) -> Any: - if hasattr(o, "to_dict"): - return o.to_dict() - elif isinstance(o, datetime.datetime): - return o.isoformat() - else: - super().default(o) - - def to_json(response: Any) -> aiohttp.web.StreamResponse: if response is None: raise web.HTTPNoContent() diff --git a/src/backy/daemon/scheduler.py b/src/backy/daemon/scheduler.py index 2e15aee8..9fea1d6a 100644 --- a/src/backy/daemon/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -75,29 +75,6 @@ def spread(self) -> int: generator.seed(seed) return generator.randint(0, limit) - @property - def sla(self) -> bool: - """Is the SLA currently held? - - The SLA being held is only reflecting the current status. - - It does not help to reflect on past situations that have failed as - those are not indicators whether and admin needs to do something - right now. - """ - return not self.sla_overdue - - @property - def sla_overdue(self) -> int: - """Amount of time the SLA is currently overdue.""" - if not self.repository.clean_history: - return 0 - age = backy.utils.now() - self.repository.clean_history[-1].timestamp - max_age = min(x["interval"] for x in self.schedule.schedule.values()) - if age > max_age * 1.5: - return age.total_seconds() - return 0 - @property def schedule(self) -> Schedule: return self.repository.schedule @@ -113,8 +90,9 @@ def update_status(self, status: str) -> None: def to_dict(self) -> dict: return { "name": self.name, + "path": self.path, "status": self.status, - "source": self.source, + # "source": self.source, "schedule": self.schedule.to_dict(), } diff --git a/src/backy/daemon/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py index 257060b7..8f39e210 100644 --- a/src/backy/daemon/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -6,10 +6,12 @@ import signal from pathlib import Path from unittest import mock +from unittest.mock import Mock import pytest import yaml +import backy.daemon from backy import utils from backy.daemon import BackyDaemon from backy.daemon.scheduler import Job @@ -18,6 +20,49 @@ from backy.tests import Ellipsis +def test_display_help(capsys, argv): + argv.append("--help") + with pytest.raises(SystemExit) as exit: + backy.daemon.main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +usage: pytest [-h] [-v] [-l LOGFILE] [-c CONFIG] + +Backy daemon - runs the scheduler and API. + +options: +... +""" + ) + == out + ) + assert err == "" + + +async def test_main(tmp_path, argv, monkeypatch): + mock = Mock() + monkeypatch.setattr(backy.daemon.BackyDaemon, "start", mock) + monkeypatch.setattr(backy.daemon.BackyDaemon, "api_server", mock) + monkeypatch.setattr(backy.daemon.BackyDaemon, "run_forever", mock) + argv.extend( + ["-v", "-l", str(tmp_path / "log"), "-c", str(tmp_path / "conf")] + ) + utils.log_data = "" + + backy.daemon.main() + + assert mock.call_count == 3 + assert ( + Ellipsis( + "... D - command/invoked args='... -v -l ... -c ...\n" + ) + == utils.log_data + ) + + @pytest.fixture async def daemon(tmp_path, monkeypatch, log): daemon = BackyDaemon(tmp_path / "config", log) @@ -204,7 +249,7 @@ def test_sla_before_first_backup(daemon): # think of something when this happens. Maybe keeping a log of errors # or so to notice that we tried previously. assert len(job.repository.history) == 0 - assert job.sla is True + assert job.repository.sla is True def test_sla_over_time(daemon, clock, tmp_path, log): @@ -220,25 +265,25 @@ def test_sla_over_time(daemon, clock, tmp_path, log): revision.materialize() job.repository.scan() assert len(job.repository.history) == 1 - assert job.sla is True + assert job.repository.sla is True # 24 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=24) revision.write_info() job.repository.scan() - assert job.sla is True + assert job.repository.sla is True # 32 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=32) revision.write_info() job.repository.scan() - assert job.sla is True + assert job.repository.sla is True # 24*1.5 hours is the last time that is OK. revision.timestamp = utils.now() - datetime.timedelta(hours=24 * 1.5) revision.write_info() job.repository.scan() - assert job.sla is True + assert job.repository.sla is True # 1 second later we consider this not to be good any longer. revision.timestamp = ( @@ -248,13 +293,13 @@ def test_sla_over_time(daemon, clock, tmp_path, log): ) revision.write_info() job.repository.scan() - assert job.sla is False + assert job.repository.sla is False # a running backup does not influence this. job.update_status("running (slow)") r = Revision.create(job.repository, {"daily"}, log) r.write_info() - assert job.sla is False + assert job.repository.sla is False def test_incomplete_revs_dont_count_for_sla(daemon, clock, tmp_path, log): @@ -267,7 +312,7 @@ def test_incomplete_revs_dont_count_for_sla(daemon, clock, tmp_path, log): r2.timestamp = utils.now() - datetime.timedelta(hours=1) r2.materialize() job.repository.scan() - assert False is job.sla + assert False is job.repository.sla def test_update_status(daemon, log): diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py index afeff307..768c0cb8 100644 --- a/src/backy/file/__init__.py +++ b/src/backy/file/__init__.py @@ -1,7 +1,7 @@ import shutil import sys import time -from argparse import ArgumentParser, Namespace +from argparse import _ActionsContainer from dataclasses import dataclass from pathlib import Path from typing import Any, Iterable @@ -22,11 +22,11 @@ def to_cmdargs(self) -> Iterable[str]: return [str(self.target)] @classmethod - def from_args(cls, args: Namespace) -> "FileRestoreArgs": - return cls(args.target) + def from_args(cls, **kw: Any) -> "FileRestoreArgs": + return cls(kw["target"]) @classmethod - def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: restore_parser.add_argument( "target", type=Path, diff --git a/src/backy/logging.py b/src/backy/logging.py index 988ead79..ccf30de6 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -3,7 +3,6 @@ # repository for complete details. import io -import os import string import sys from pathlib import Path diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py index 365ff511..86d6a7ba 100644 --- a/src/backy/rbd/__init__.py +++ b/src/backy/rbd/__init__.py @@ -4,11 +4,11 @@ import sys import time import uuid -from argparse import ArgumentParser, Namespace +from argparse import _ActionsContainer from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import IO, Any, Iterable, Literal, Optional, Set, cast +from typing import IO, Any, Callable, Iterable, Literal, Optional, Set, cast import consulate from structlog.stdlib import BoundLogger @@ -19,7 +19,7 @@ from backy.report import ChunkMismatchReport from backy.repository import Repository from backy.revision import Revision, Trust -from backy.source import RestoreArgs, RestoreArgsType, Source +from backy.source import RestoreArgs, Source from backy.utils import ( CHUNK_SIZE, END, @@ -56,11 +56,11 @@ def to_cmdargs(self) -> Iterable[str]: return ["--backend", self.backend.value, self.target] @classmethod - def from_args(cls, args: Namespace) -> "RBDRestoreArgs": - return cls(args.target, args.restore_backend) + def from_args(cls, **kw: Any) -> "RBDRestoreArgs": + return cls(kw["target"], kw["restore_backend"]) @classmethod - def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: restore_parser.add_argument( "--backend", type=RestoreBackend, @@ -149,14 +149,15 @@ def backup(self, revision: Revision) -> bool: else: source.full(file) with self.open(revision) as file: - report = source.verify(file) - if report: - self.repository.add_report(report) - verified = not report + verified = source.verify( + file, report=self.repository.add_report + ) except BackendException: self.log.exception("ceph-error-distrust-all") verified = False - self.repository.distrust("local", skip_lock=True) + self.repository.distrust( + self.repository.find_revisions("local"), skip_lock=True + ) if not verified: self.log.error( "verification-failed", @@ -527,7 +528,11 @@ def full(self, target: File) -> None: while buf := source.read(4 * backy.utils.MiB): target.write(buf) - def verify(self, target: File) -> Optional[ChunkMismatchReport]: + def verify( + self, + target: File, + report: Callable[[ChunkMismatchReport], None] = lambda _: None, + ) -> bool: s = self.rbd.image_reader( "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) ) @@ -535,7 +540,11 @@ def verify(self, target: File) -> Optional[ChunkMismatchReport]: with s as source: self.log.info("verify") - return backy.utils.files_are_roughly_equal(source, cast(IO, target)) + return backy.utils.files_are_roughly_equal( + source, + cast(IO, target), + report=lambda s, t, o: report(ChunkMismatchReport(s, t, o)), + ) def _delete_old_snapshots(self) -> None: # Clean up all snapshots except the one for the most recent valid diff --git a/src/backy/rbd/tests/test_ceph.py b/src/backy/rbd/tests/test_ceph.py index 9febfca2..9b8a40f2 100644 --- a/src/backy/rbd/tests/test_ceph.py +++ b/src/backy/rbd/tests/test_ceph.py @@ -5,6 +5,7 @@ import time from pathlib import Path from unittest import mock +from unittest.mock import Mock import consulate import pytest @@ -287,7 +288,9 @@ def test_verify_fail(ceph_rbd, rbdsource, repository, tmp_path, log): f.write(b"foobar") # The chunked store has false data, so this needs to be detected. with ceph_rbd(revision), rbdsource.open(revision) as target: - assert ceph_rbd.verify(target) + mock = Mock() + assert not ceph_rbd.verify(target, report=mock) + mock.assert_called_once() def test_verify(ceph_rbd, rbdsource, repository, tmp_path, log): @@ -308,7 +311,9 @@ def test_verify(ceph_rbd, rbdsource, repository, tmp_path, log): f.flush() with ceph_rbd(revision), rbdsource.open(revision) as target: - assert not ceph_rbd.verify(target) + mock = Mock() + assert ceph_rbd.verify(target, report=mock) + mock.assert_not_called() @pytest.fixture diff --git a/src/backy/rbd/tests/test_main.py b/src/backy/rbd/tests/test_main.py index 26b5ae69..5713fbea 100644 --- a/src/backy/rbd/tests/test_main.py +++ b/src/backy/rbd/tests/test_main.py @@ -14,11 +14,13 @@ @pytest.fixture def source_on_disk(tmp_path, log): - with open(tmp_path / "config", "w", encoding="utf-8") as f: + path = tmp_path / "test01" + path.mkdir() + with open(path / "config", "w", encoding="utf-8") as f: f.write( f""" --- -path: "{tmp_path}" +path: "{path}" schedule: daily: interval: 1d @@ -29,7 +31,7 @@ def source_on_disk(tmp_path, log): image: b """ ) - return CmdLineSource.load(tmp_path, log).create_source() + return CmdLineSource.load(path, log).create_source() def test_display_usage(capsys): @@ -38,8 +40,8 @@ def test_display_usage(capsys): out, err = capsys.readouterr() assert ( """\ -usage: pytest [-h] [-v] [-C WORKDIR] [-t TASKID] - {backup,restore,gc,verify} ... +usage: backy-rbd [-h] [-v] [-C WORKDIR] [-t TASKID] + {backup,restore,gc,verify} ... """ == out ) @@ -54,8 +56,8 @@ def test_display_help(capsys): assert ( Ellipsis( """\ -usage: pytest [-h] [-v] [-C WORKDIR] [-t TASKID] - {backup,restore,gc,verify} ... +usage: backy-rbd [-h] [-v] [-C WORKDIR] [-t TASKID] + {backup,restore,gc,verify} ... The rbd plugin for backy. You should not call this directly. Use the backy command instead. @@ -145,7 +147,9 @@ def test_call_fun( partialmethod(print_args, return_value=rv), ) utils.log_data = "" - exit = RBDSource.main("backy-rbd", "-v", *args) + exit = RBDSource.main( + "backy-rbd", "-v", "-C", str(source_on_disk.repository.path), *args + ) assert exit == rc out, err = capsys.readouterr() assert ( @@ -160,8 +164,8 @@ def test_call_fun( assert ( Ellipsis( f"""\ -... D - command/invoked args='backy-rbd -v {" ".join([ *args])}' -... D - repo/scan-reports entries=0 +... D - command/invoked args='backy-rbd -v -C ... {" ".join([ *args])}' +... D test01 repo/scan-reports entries=0 ... D - command/return-code code={rc} """ ) @@ -191,7 +195,7 @@ def do_raise(*args, **kw): Ellipsis( """\ ... D - command/invoked args='backy-rbd -C ... gc' -... D - repo/scan-reports entries=0 +... D test01 repo/scan-reports entries=0 ... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' exception>\tTraceback (most recent call last): exception>\t File ".../src/backy/source.py", line ..., in main diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py index d41308e5..1bf749da 100644 --- a/src/backy/rbd/tests/test_source.py +++ b/src/backy/rbd/tests/test_source.py @@ -39,8 +39,8 @@ def full(self, file): assert self.data file.write(self.data) - def verify(self, target: IO): - assert self.data == target.read() + def verify(self, target: IO, report=None): + return self.data == target.read() @pytest.fixture diff --git a/src/backy/report.py b/src/backy/report.py index f3d9d669..17144c6f 100644 --- a/src/backy/report.py +++ b/src/backy/report.py @@ -1,28 +1,40 @@ -import dataclasses import datetime import hashlib import traceback -from dataclasses import dataclass, field +from abc import ABC, abstractmethod from pathlib import Path +from typing import Optional import shortuuid import yaml from structlog.stdlib import BoundLogger from yaml import SafeDumper -import backy +import backy.utils from backy.utils import SafeFile -@dataclass(frozen=True) -class ProblemReport: - uuid: str = field(init=False, default_factory=shortuuid.uuid) - timestamp: datetime.datetime = field( - init=False, default_factory=backy.utils.now - ) +class ProblemReport(ABC): + uuid: str + timestamp: datetime.datetime + + def __init__( + self, + uuid: Optional[str] = None, + timestamp: Optional[datetime.datetime] = None, + ): + self.uuid = uuid or shortuuid.uuid() + self.timestamp = timestamp or backy.utils.now() def to_dict(self) -> dict: - return dataclasses.asdict(self) + return { + "uuid": self.uuid, + "timestamp": self.timestamp, + } + + @abstractmethod + def get_message(self) -> str: + ... def store(self, dir: Path, log: BoundLogger) -> None: log.debug("store-report", uuid=self.uuid) @@ -48,31 +60,33 @@ def representer(dumper, data): yaml.dump(self.to_dict(), f, sort_keys=False, Dumper=CustomDumper) -@dataclass(frozen=True) class ChunkMismatchReport(ProblemReport): source_chunk: bytes - source_hash: str = field(init=False) + source_hash: str target_chunk: bytes - target_hash: str = field(init=False) + target_hash: str offset: int - traceback: str = field(init=False) - - def __post_init__(self) -> None: - object.__setattr__( - self, "source_hash", hashlib.md5(self.source_chunk).hexdigest() - ) - object.__setattr__( - self, "target_hash", hashlib.md5(self.target_chunk).hexdigest() - ) - object.__setattr__( - self, "traceback", "".join(traceback.format_stack()).strip() - ) + traceback: str + + def __init__(self, source_chunk: bytes, target_chunk: bytes, offset: int): + super().__init__() + self.source_chunk = source_chunk + self.target_chunk = target_chunk + self.offset = offset + self.source_hash = hashlib.md5(self.source_chunk).hexdigest() + self.target_hash = hashlib.md5(self.target_chunk).hexdigest() + self.traceback = "".join(traceback.format_stack()).strip() def to_dict(self) -> dict: - dict = dataclasses.asdict(self) - del dict["source_chunk"] - del dict["target_chunk"] - return dict + return super().to_dict() | { + "source_hash": self.source_hash, + "target_hash": self.target_hash, + "offset": self.offset, + "traceback": self.traceback, + } + + def get_message(self) -> str: + return f"Mismatching chunks at offset {self.offset}" def store(self, dir: Path, log: BoundLogger) -> None: chunks_path = dir / "chunks" diff --git a/src/backy/repository.py b/src/backy/repository.py index bdee86cd..25137629 100644 --- a/src/backy/repository.py +++ b/src/backy/repository.py @@ -4,11 +4,12 @@ import re from math import ceil, floor from pathlib import Path -from typing import IO, Any, List, Literal, Optional, TypedDict +from typing import IO, Any, Iterable, List, Literal, Optional, TypedDict import tzlocal from structlog.stdlib import BoundLogger +import backy from backy.utils import ( duplicates, list_get, @@ -73,7 +74,7 @@ def __init__( self.path = path.resolve() self.report_path = self.path / "quarantine" self.schedule = schedule - self.log = log.bind(subsystem="repo") + self.log = log.bind(subsystem="repo", job_name=self.name) self._lock_fds = {} def connect(self): @@ -94,9 +95,35 @@ def add_report(self, report: ProblemReport) -> None: self.report_ids.append(report.uuid) def scan_reports(self) -> None: - self.report_ids = [g.name for g in self.report_path.glob("*.report")] + self.report_ids = [ + g.name.removesuffix(".report") + for g in self.report_path.glob("*.report") + ] self.log.debug("scan-reports", entries=len(self.report_ids)) + @property + def sla(self) -> bool: + """Is the SLA currently held? + + The SLA being held is only reflecting the current status. + + It does not help to reflect on past situations that have failed as + those are not indicators whether and admin needs to do something + right now. + """ + return not self.sla_overdue + + @property + def sla_overdue(self) -> int: + """Amount of time the SLA is currently overdue.""" + if not self.clean_history: + return 0 + age = backy.utils.now() - self.clean_history[-1].timestamp + max_age = min(x["interval"] for x in self.schedule.schedule.values()) + if age > max_age * 1.5: + return age.total_seconds() + return 0 + # Locking strategy: # # - You can only run one backup of a machine at a time, as the backup will @@ -276,8 +303,8 @@ def _clean(self) -> None: revision.remove() @locked(target=".backup", mode="exclusive") - def forget(self, revision: str) -> None: - for r in self.find_revisions(revision): + def rm(self, revs: Iterable[Revision]) -> None: + for r in revs: r.remove() @locked(target=".backup", mode="exclusive") @@ -319,10 +346,9 @@ def tags( return True @locked(target=".backup", mode="exclusive") - def distrust(self, revision: str) -> None: - revs = self.find_revisions(revision) - self.prevent_remote_rev(revs) + def distrust(self, revs: Iterable[Revision]) -> None: for r in revs: + assert not r.server r.distrust() r.write_info() diff --git a/src/backy/revision.py b/src/backy/revision.py index 9d59e1d3..3c49637e 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -125,6 +125,7 @@ def to_dict(self) -> dict: "server": self.server, } + # TODO: disallow local modification @property def pending_changes(self): return self.server and self.tags != self.orig_tags diff --git a/src/backy/s3/__init__.py b/src/backy/s3/__init__.py index 4cbb3021..a144c2f2 100644 --- a/src/backy/s3/__init__.py +++ b/src/backy/s3/__init__.py @@ -1 +1,54 @@ # Placeholder for future S3 implementation +from argparse import _ActionsContainer +from dataclasses import dataclass +from typing import Any, Iterable + +from structlog.stdlib import BoundLogger + +from backy.repository import Repository +from backy.revision import Revision +from backy.source import RestoreArgs, RestoreArgsType, Source, SourceType + + +@dataclass(frozen=True) +class S3RestoreArgs(RestoreArgs): + def to_cmdargs(self) -> Iterable[str]: + return [] + + @classmethod + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: + pass + + @classmethod + def from_args(cls: type[RestoreArgsType], **kw: Any) -> RestoreArgsType: + return cls() + + +class S3Source(Source): + type_ = "s3" + restore_type = S3RestoreArgs + + @classmethod + def from_config( + cls: type[SourceType], + repository: "Repository", + config: dict[str, Any], + log: BoundLogger, + ) -> SourceType: + raise NotImplementedError() + + def backup(self, revision: "Revision") -> bool: + raise NotImplementedError() + + def restore(self, revision: "Revision", args: RestoreArgsType): + raise NotImplementedError() + + def verify(self, revision: "Revision"): + raise NotImplementedError() + + def gc(self) -> None: + raise NotImplementedError() + + +def main(): + raise NotImplementedError() diff --git a/src/backy/schedule.py b/src/backy/schedule.py index 474ff9f4..e140ab9d 100644 --- a/src/backy/schedule.py +++ b/src/backy/schedule.py @@ -69,6 +69,12 @@ def configure(self, config: dict) -> None: def to_dict(self) -> dict: return self.config + @classmethod + def from_dict(cls, conf) -> "Schedule": + r = cls() + r.configure(conf) + return r + def next( self, relative: datetime.datetime, spread: int, repository: "Repository" ) -> Tuple[datetime.datetime, Set[str]]: diff --git a/src/backy/source.py b/src/backy/source.py index 6ddbf2ee..07fd9b8b 100644 --- a/src/backy/source.py +++ b/src/backy/source.py @@ -4,7 +4,7 @@ import filecmp import subprocess from abc import ABC, abstractmethod -from argparse import ArgumentParser, Namespace +from argparse import ArgumentParser, _ActionsContainer from dataclasses import dataclass from importlib.metadata import entry_points from pathlib import Path @@ -40,14 +40,12 @@ def to_cmdargs(self) -> Iterable[str]: @classmethod @abstractmethod - def setup_argparse(cls, restore_parser: ArgumentParser) -> None: + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: ... @classmethod @abstractmethod - def from_args( - cls: type[RestoreArgsType], args: Namespace - ) -> RestoreArgsType: + def from_args(cls: type[RestoreArgsType], **kw: Any) -> RestoreArgsType: ... @@ -124,7 +122,9 @@ def gc(self) -> None: @classmethod def create_argparse(cls) -> ArgumentParser: parser = argparse.ArgumentParser( - description=f"The {cls.type_} plugin for backy. You should not call this directly. Use the backy command instead.", + prog=f"backy-{cls.type_}", + description=f"The {cls.type_} plugin for backy.\n" + "You should not call this directly. Use the backy command instead.", ) parser.add_argument( "-v", "--verbose", action="store_true", help="verbose output" @@ -220,7 +220,10 @@ def main(cls, *str_args: str) -> int: ret = int(not success) case "restore": rev = source.repository.find_by_uuid(args.revision) - source.restore(rev, cls.restore_type.from_args(args)) + source.restore( + rev, + cls.restore_type.from_args(**dict(args._get_kwargs())), + ) case "gc": source.gc() case "verify": @@ -250,6 +253,10 @@ class CmdLineSource: def type_(self): return self.source_conf["type"] + @property + def restore_type(self): + return factory_by_type(self.type_).restore_type + @property def subcommand(self) -> str: return "backy-" + self.type_ diff --git a/src/backy/tests/test_report.py b/src/backy/tests/test_report.py index f7d82378..9f4dd2a2 100644 --- a/src/backy/tests/test_report.py +++ b/src/backy/tests/test_report.py @@ -1,11 +1,8 @@ -import pytest - from backy.report import ChunkMismatchReport from backy.tests import Ellipsis -@pytest.mark.skip -def test_quarantine(tmp_path, repository, log, clock): +def test_report(tmp_path, repository, log, clock): repository.add_report(ChunkMismatchReport(b"source", b"target", 3)) with open( (tmp_path / "quarantine" / repository.report_ids[0]).with_suffix( @@ -22,9 +19,9 @@ def test_quarantine(tmp_path, repository, log, clock): offset: 3 traceback: |- ... - File ".../src/backy/rbd/tests/test_quarantine.py", line ..., in test_quarantine - store.add_report(QuarantineReport(b"source", b"target", 3)) - File ".../src/backy/rbd/quarantine.py", line ..., in __init__ + File ".../src/backy/tests/test_report.py", line ..., in test_report + repository.add_report(ChunkMismatchReport(b"source", b"target", 3)) + File ".../src/backy/report.py", line ..., in __init__ self.traceback = "".join(traceback.format_stack()).strip() """ ) diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index 47738d2a..14c2b1ef 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -272,7 +272,7 @@ def test_roughly_compare_files_same(tmp_path): f.write(b"asdf" * 100) for x in range(20): - assert not files_are_roughly_equal( + assert files_are_roughly_equal( open("a", "rb"), open("b", "rb"), blocksize=10 ) @@ -288,7 +288,7 @@ def test_roughly_compare_files_1_changed_block(tmp_path): detected = 0 for x in range(20): - detected += not files_are_roughly_equal( + detected += files_are_roughly_equal( open("a", "rb"), open("b", "rb"), blocksize=10 ) @@ -304,11 +304,9 @@ def test_roughly_compare_files_timeout(tmp_path): # The files are different but we don't notice as we run into a timeout. # That's fine. - assert not files_are_roughly_equal( - open("a", "rb"), open("b", "rb"), timeout=0 - ) + assert files_are_roughly_equal(open("a", "rb"), open("b", "rb"), timeout=0) # Without the timeout we do notice - assert files_are_roughly_equal(open("a", "rb"), open("b", "rb")) + assert not files_are_roughly_equal(open("a", "rb"), open("b", "rb")) def test_copy_overwrite_correctly_makes_sparse_file(tmp_path): diff --git a/src/backy/utils.py b/src/backy/utils.py index 243aa380..d4c8fa2e 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -14,7 +14,9 @@ import time import typing from asyncio import Event -from typing import IO, Callable, Iterable, List, Literal, Optional, TypeVar +from json import JSONEncoder +from pathlib import Path +from typing import IO, Any, Callable, Iterable, List, Literal, Optional, TypeVar from zoneinfo import ZoneInfo import aiofiles.os as aos @@ -22,8 +24,6 @@ import structlog import tzlocal -import backy - from .ext_deps import CP _T = TypeVar("_T") @@ -389,7 +389,8 @@ def files_are_roughly_equal( samplesize=0.01, blocksize=CHUNK_SIZE, timeout=5 * 60, -) -> Optional["backy.report.ChunkMismatchReport"]: + report: Callable[[bytes, bytes, int], None] = lambda a, b, c: None, +) -> bool: a.seek(0, os.SEEK_END) size = a.tell() blocks = size // blocksize @@ -413,24 +414,22 @@ def files_are_roughly_equal( duration = now() - started if duration > max_duration: log.info("files-roughly-equal-stopped", duration=duration) - return None + return True a.seek(block * blocksize) b.seek(block * blocksize) chunk_a = a.read(blocksize) chunk_b = b.read(blocksize) if chunk_a != chunk_b: - report = backy.report.ChunkMismatchReport( - chunk_a, chunk_b, block * blocksize - ) log.error( "files-not-roughly-equal", - hash_a=report.source_hash, - hash_b=report.target_hash, - pos=report.offset, + hash_a=hashlib.md5(chunk_a).hexdigest(), + hash_b=hashlib.md5(chunk_b).hexdigest(), + pos=block * blocksize, ) - return report - return None + report(chunk_a, chunk_b, block * blocksize) + return False + return True def now(): @@ -648,3 +647,15 @@ def punch_hole(f, offset, len_): fallocate(*params) except OSError: _fake_fallocate(*params) + + +class BackyJSONEncoder(JSONEncoder): + def default(self, o: Any) -> Any: + if hasattr(o, "to_dict"): + return o.to_dict() + elif isinstance(o, datetime.datetime): + return o.isoformat() + elif isinstance(o, Path): + return str(o) + else: + super().default(o) From f4ff459d17914cc916c57c5cd7872a6fbd6b6e43 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Mon, 12 Aug 2024 13:32:12 +0200 Subject: [PATCH 23/25] Document our ideas for the new architecture. The main goal here is to prepare for S3 backups, which have a fundamentally different structure. --- ARCHITECTURE.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 ARCHITECTURE.md diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 00000000..570cdbf4 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,48 @@ + +# Daemon + +Backy has a daemon that is responsible to: + +1. Schedule jobs in a timely manner according to their configuration. + +2. Provide an HTTP API to allow multiple backy servers to interact with each + other. + +There is a (PostgreSQL) database to store metadata about backups that both +the daemon, the CLI (including the sources) interact with. + +# CLI + +The are two levels of CLI interactions: + +1. The main `backy` command provides administrators interaction capabilities + with the backy environment on a server to retrieve status information, + run backups, restore data and some maintenance tasks. + +2. Backy itself interacts with sources through a second layer of CLI commands, + specific to each source. They are called by the higher level CLI as well as + from the daemon. We use this layering to allow implementing sources in + different languages. + +The CLI ideally does not interact with the daemon directly, but by inspecting +or updating the database. + +# Nomenclature + +Words within the context of backup software are a bit muddy, specifically +the meaning of "a backup". We decided to take inspiration from the git dictionary +and use it the following way: + +1. A **repository** is - similar to git - the logical container for the user + data relevant to one thing that we are backing up. + +2. A **source** provides the data that should be backed up. Different kinds + of sources can model arbitrary data models: backy does not care whether + you are backing up virtual disk images or S3 buckets. + +3. A **revision** specifies the state of the source at a certain point in time + and corresponds to what would be colloquially called "a backup". + +4. The daemon uses a **job** for every repository to execute the steps necessary + to perform regular backups with all surrounding management tasks like + garbage collection, verification, etc. From e32c7ef7414fd8942590b74faa082478ae494d42 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Mon, 12 Aug 2024 13:39:14 +0200 Subject: [PATCH 24/25] changelog --- ...ct_PL_132755_refactor_module_and_subcommand_structure.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst diff --git a/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst b/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst new file mode 100644 index 00000000..bc761592 --- /dev/null +++ b/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst @@ -0,0 +1,5 @@ +.. A new scriv changelog fragment. + +- Refactor the overall structure to prepare for more diverse sources: + use a sub-CLI pattern to talk to source implementations and clean up + the Ceph source, removing unused Code. (PL-132755) From 4324908fbbb9b3a274cc35b0e6923f6bc57e8050 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Mon, 12 Aug 2024 13:43:50 +0200 Subject: [PATCH 25/25] style --- ARCHITECTURE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 570cdbf4..1405455f 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -37,7 +37,7 @@ and use it the following way: data relevant to one thing that we are backing up. 2. A **source** provides the data that should be backed up. Different kinds - of sources can model arbitrary data models: backy does not care whether + of sources can model arbitrary data models: backy does not care whether you are backing up virtual disk images or S3 buckets. 3. A **revision** specifies the state of the source at a certain point in time