diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 00000000..1405455f --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,48 @@ + +# Daemon + +Backy has a daemon that is responsible to: + +1. Schedule jobs in a timely manner according to their configuration. + +2. Provide an HTTP API to allow multiple backy servers to interact with each + other. + +There is a (PostgreSQL) database to store metadata about backups that both +the daemon, the CLI (including the sources) interact with. + +# CLI + +The are two levels of CLI interactions: + +1. The main `backy` command provides administrators interaction capabilities + with the backy environment on a server to retrieve status information, + run backups, restore data and some maintenance tasks. + +2. Backy itself interacts with sources through a second layer of CLI commands, + specific to each source. They are called by the higher level CLI as well as + from the daemon. We use this layering to allow implementing sources in + different languages. + +The CLI ideally does not interact with the daemon directly, but by inspecting +or updating the database. + +# Nomenclature + +Words within the context of backup software are a bit muddy, specifically +the meaning of "a backup". We decided to take inspiration from the git dictionary +and use it the following way: + +1. A **repository** is - similar to git - the logical container for the user + data relevant to one thing that we are backing up. + +2. A **source** provides the data that should be backed up. Different kinds + of sources can model arbitrary data models: backy does not care whether + you are backing up virtual disk images or S3 buckets. + +3. A **revision** specifies the state of the source at a certain point in time + and corresponds to what would be colloquially called "a backup". + +4. The daemon uses a **job** for every repository to execute the steps necessary + to perform regular backups with all surrounding management tasks like + garbage collection, verification, etc. diff --git a/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst b/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst new file mode 100644 index 00000000..bc761592 --- /dev/null +++ b/changelog.d/20240812_133802_ct_PL_132755_refactor_module_and_subcommand_structure.rst @@ -0,0 +1,5 @@ +.. A new scriv changelog fragment. + +- Refactor the overall structure to prepare for more diverse sources: + use a sub-CLI pattern to talk to source implementations and clean up + the Ceph source, removing unused Code. (PL-132755) diff --git a/lib.nix b/lib.nix index bb27f872..c963c2fe 100644 --- a/lib.nix +++ b/lib.nix @@ -127,7 +127,8 @@ in devShells = { default = mkShellNoCC { - BACKY_CMD = "${poetryEnv}/bin/backy"; + BACKY_CLI_CMD = "${poetryEnv}/bin/backy"; + BACKY_RBD_CMD = "${poetryEnv}/bin/backy-rbd"; packages = [ poetryEnv poetry diff --git a/pyproject.toml b/pyproject.toml index 3a9af314..9dac99a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,5 +75,24 @@ zest-releaser = "^9.1.1" requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" +[tool.poetry.plugins.'backy.sources'] +file = 'backy.file:FileSource' +rbd = 'backy.rbd:RBDSource' +s3 = 'backy.s3:S3Source' + [tool.poetry.scripts] -backy = "backy.main:main" +backy = "backy.cli:main" +backyd = "backy.daemon:main" +backy-rbd = "backy.rbd:main" +backy-s3 = "backy.s3:main" +backy-file = "backy.file:main" + +[[tool.mypy.overrides]] +module = "backy.*" +check_untyped_defs = true + +[tool.zest-releaser] +prereleaser.before = [ "release_helper.ignore_history_file" ] +prereleaser.middle = [ "release_helper.update_poetry_version release_helper.scriv_collect" ] +postreleaser.before = [ "release_helper.ignore_history_file" ] +postreleaser.middle = [ "release_helper.update_poetry_version" ] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 97197c5e..00000000 --- a/setup.cfg +++ /dev/null @@ -1,20 +0,0 @@ -[upload_sphinx] -upload-dir = build/doc - -[yapf] -based_on_style = pep8 -column_limit = 79 -split_before_expression_after_opening_paren = true -split_before_closing_bracket = false -SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = false -BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true - -[flake8] -max-line-length = 80 -extend-ignore = E203 - -[zest.releaser] -prereleaser.before = release_helper.ignore_history_file -prereleaser.middle = release_helper.update_poetry_version release_helper.scriv_collect -postreleaser.before = release_helper.ignore_history_file -postreleaser.middle = release_helper.update_poetry_version diff --git a/setup.py b/setup.py deleted file mode 100644 index feb0069c..00000000 --- a/setup.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Block-based backup and restore utility for virtual machine images""" - -import codecs -import glob -import os.path as p -import subprocess -import sys - -from setuptools import Command, find_packages, setup - - -class PyTest(Command): - """Invoke py.test from `bin/python setup.py test`.""" - - user_options = [] # type: ignore - - def initialize_options(self): - return None - - def finalize_options(self): - return None - - def run(self): - errno = subprocess.call( - [ - sys.executable, - p.join(p.dirname(__file__), "bin", "py.test"), - "-m1", - ] - ) - raise SystemExit(errno) - - -def open_project_path(filename): - fullname = p.join(p.dirname(__file__), filename) - return codecs.open(fullname, encoding="ascii") - - -def long_desc(): - parts = [] - for name in ("README.txt", "CHANGES.txt"): - with open_project_path(name) as f: - parts.append(f.read()) - return "\n".join(parts) - - -setup( - name="backy", - version="2.6.0.dev0", - install_requires=[ - "consulate", - "packaging", - "tzlocal", - "PyYaml", - "setuptools", - "shortuuid", - "python-lzo", - "humanize", - "mmh3", - "structlog", - "aiohttp", - "rich", - ], - extras_require={ - "test": [ - "pytest", - "pytest-asyncio", - "pytest-cache", - "pytest-cov", - "pytest-flake8", - "pytest-timeout", - ], - }, - entry_points=""" - [console_scripts] - backy = backy.main:main - """, - author=( - "Christian Theune , " - "Christian Kauhaus , " - "Daniel Kraft " - ), - author_email="mail@flyingcircus.io", - license="GPL-3", - url="https://bitbucket.org/flyingcircus/backy", - keywords="backup", - classifiers="""\ -Development Status :: 5 - Production/Stable -Environment :: Console -Intended Audience :: System Administrators -License :: OSI Approved :: GNU General Public License v3 (GPLv3) -Operating System :: POSIX -Programming Language :: Python -Programming Language :: Python :: 3 -Programming Language :: Python :: 3.6 -Programming Language :: Python :: 3.7 -Programming Language :: Python :: 3.8 -Programming Language :: Python :: 3.9 -Topic :: System :: Archiving :: Backup -"""[ - :-1 - ].split( - "\n" - ), - description=__doc__.strip(), - long_description=long_desc(), - packages=find_packages("src"), - package_dir={"": "src"}, - include_package_data=True, - data_files=[("", glob.glob("*.txt"))], - zip_safe=False, - cmdclass={"test": PyTest}, -) diff --git a/src/backy/backends/__init__.py b/src/backy/backends/__init__.py deleted file mode 100644 index a19d7584..00000000 --- a/src/backy/backends/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -from abc import ABC, abstractmethod -from typing import IO, TYPE_CHECKING, Optional, Type - -from structlog.stdlib import BoundLogger - -if TYPE_CHECKING: - from backy.revision import Revision - - -class BackendException(IOError): - pass - - -class BackyBackend(ABC): - @abstractmethod - def __init__(self, revision: "Revision", log: BoundLogger) -> None: - ... - - @abstractmethod - def open(self, mode: str = "rb", parent: Optional["Revision"] = None) -> IO: - ... - - def purge(self) -> None: - pass - - def verify(self) -> None: - pass - - -def select_backend(type_: str) -> Type[BackyBackend]: - match type_: - case "chunked": - from backy.backends.chunked import ChunkedFileBackend - - return ChunkedFileBackend - case "cowfile": - from backy.backends.cowfile import COWFileBackend - - return COWFileBackend - case _: - raise ValueError(f"Invalid backend '{type_}'") diff --git a/src/backy/backends/chunked/__init__.py b/src/backy/backends/chunked/__init__.py deleted file mode 100644 index 0fbf74c5..00000000 --- a/src/backy/backends/chunked/__init__.py +++ /dev/null @@ -1,123 +0,0 @@ -from pathlib import Path -from typing import Optional, Set - -from structlog.stdlib import BoundLogger - -from backy.revision import Revision, Trust -from backy.utils import END, report_status - -from .. import BackyBackend -from .chunk import Chunk, Hash -from .file import File -from .store import Store - - -class ChunkedFileBackend(BackyBackend): - # multiple Backends may share the same store - STORES: dict[Path, Store] = dict() - - def __init__(self, revision: Revision, log: BoundLogger): - assert revision.backend_type == "chunked" - self.backup = revision.backup - self.revision = revision - path = self.backup.path / "chunks" - if path not in self.STORES: - self.STORES[path] = Store(self.backup.path / "chunks", log) - self.store = self.STORES[path] - self.log = log.bind(subsystem="chunked") - - def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> File: # type: ignore[override] - if "w" in mode or "+" in mode: - if parent and not self.revision.filename.exists(): - with self.revision.filename.open( - "wb" - ) as new, parent.filename.open("rb") as old: - # This is ok, this is just metadata, not the actual data. - new.write(old.read()) - overlay = False - if mode == "o": - mode = "rw" - overlay = True - file = File(self.revision.filename, self.store, mode, overlay) - - if file.writable() and self.backup.contains_distrusted: - # "Force write"-mode if any revision is distrusted. - self.log.warn("forcing-full") - self.store.force_writes = True - - return file - - def purge(self) -> None: - self.log.debug("purge") - used_chunks: Set[Hash] = set() - for revision in self.backup.local_history: - if revision.backend_type != "chunked": - continue - used_chunks.update( - type(self)(revision, self.log).open()._mapping.values() - ) - self.store.purge(used_chunks) - - @report_status - def verify(self): - log = self.log.bind(revision_uuid=self.revision.uuid) - log.info("verify-start") - verified_chunks: Set[Hash] = set() - - # Load verified chunks to avoid duplicate work - for revision in self.backup.get_history(clean=True, local=True): - if ( - revision.trust != Trust.VERIFIED - or revision.backend_type != "chunked" - ): - continue - verified_chunks.update( - type(self)(revision, self.log).open()._mapping.values() - ) - - log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) - - errors = False - # Go through all chunks and check them. Delete problematic ones. - f = self.open() - hashes = set(f._mapping.values()) - verified_chunks - yield len(hashes) + 2 - for candidate in hashes: - yield - if candidate in verified_chunks: - continue - try: - c = Chunk(self.store, candidate) - c._read_existing() - except Exception: - log.exception("verify-error", chunk=candidate) - errors = True - if self.store.chunk_path(candidate).exists(): - try: - self.store.chunk_path(candidate).unlink() - except Exception: - log.exception("verify-remove-error", chunk=candidate) - # This is an optimisation: we can skip this revision, purge it - # and then keep verifying other chunks. This avoids checking - # things unnecessarily in duplicate. - # And we only mark it as verified if we never saw any problems. - break - - yield - - if errors: - # Found any issues? Delete this revision as we can't trust it. - self.revision.remove() - else: - # No problems found - mark as verified. - self.revision.verify() - self.revision.write_info() - - yield - - # Purge to ensure that we don't leave unused, potentially untrusted - # stuff around, especially if this was the last revision. - self.purge() - - yield END - yield None diff --git a/src/backy/backends/chunked/tests/test_backend.py b/src/backy/backends/chunked/tests/test_backend.py deleted file mode 100644 index c93c2362..00000000 --- a/src/backy/backends/chunked/tests/test_backend.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - -import pytest - -from backy.backends.chunked import ChunkedFileBackend -from backy.revision import Revision - - -def test_overlay(simple_file_config, log): - r = Revision.create(simple_file_config, set(), log) - assert isinstance(r.backend, ChunkedFileBackend) - # Write 1 version to the file - f = r.backend.open("w") - f.write(b"asdf") - f.close() - with r.backend.open("r") as f: - assert f.read() == b"asdf" - # Open the file in overlay, write to it - f = r.backend.open("o") - assert f.read() == b"asdf" - f.seek(0) - f.write(b"bsdf") - f.seek(0) - assert f.read() == b"bsdf" - f.close() - # Close the file and open it again results in the original content - f = r.backend.open("r") - assert f.read() == b"asdf" - f.close() - - -def test_purge(simple_file_config, log): - b = simple_file_config - r = Revision.create(b, set(), log) - # Write 1 version to the file - f = r.backend.open("w") - f.write(b"asdf") - f.close() - r.materialize() - remote = Revision(b, log) # remote revision without local data - remote.server = "remote" - remote.materialize() - b.scan() - # Reassign as the scan will create a new reference - r = b.history[0] - assert len(list(r.backend.store.ls())) == 1 - r.backend.purge() - assert len(list(r.backend.store.ls())) == 1 - r.remove() - r.backend.purge() - assert len(list(r.backend.store.ls())) == 0 diff --git a/src/backy/backends/cowfile.py b/src/backy/backends/cowfile.py deleted file mode 100644 index a6ee3307..00000000 --- a/src/backy/backends/cowfile.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import IO, Optional - -from structlog.stdlib import BoundLogger - -from backy.backends import BackyBackend -from backy.revision import Revision -from backy.utils import CHUNK_SIZE, cp_reflink - - -class COWFileBackend(BackyBackend): - revision: Revision - - def __init__(self, revision: Revision, log: BoundLogger): - assert revision.backend_type == "cowfile" - self.revision = revision - - def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> IO: - if not self.revision.filename.exists(): - if not parent: - self.revision.filename.open("wb").close() - else: - cp_reflink(parent.filename, self.revision.filename) - self.revision.writable() - return self.revision.filename.open(mode, buffering=CHUNK_SIZE) diff --git a/src/backy/backup.py b/src/backy/backup.py deleted file mode 100644 index a50d98a8..00000000 --- a/src/backy/backup.py +++ /dev/null @@ -1,911 +0,0 @@ -import asyncio -import datetime -import fcntl -import os -import re -import subprocess -import time -from collections import defaultdict -from enum import Enum -from math import ceil, floor -from pathlib import Path -from typing import IO, List, Literal, Optional, Type - -import tzlocal -import yaml -from aiohttp import ClientConnectionError, ClientError, ClientResponseError -from aiohttp.web_exceptions import HTTPForbidden, HTTPNotFound -from structlog.stdlib import BoundLogger - -import backy.backends.chunked -from backy.utils import ( - duplicates, - list_get, - list_rindex, - list_split, - min_date, - unique, -) - -from .backends import BackendException, BackyBackend, select_backend -from .client import APIClient, APIClientManager -from .ext_deps import BACKY_EXTRACT -from .quarantine import QuarantineStore -from .revision import Revision, Trust, filter_schedule_tags -from .schedule import Schedule -from .sources import BackySourceFactory, select_source -from .utils import CHUNK_SIZE, copy, posix_fadvise - -# Locking strategy: -# -# - You can only run one backup of a machine at a time, as the backup will -# interact with this machines' list of snapshots and will get confused -# if run in parallel. -# - You can restore while a backup is running. -# - You can only purge while nothing else is happening. -# - Trying to get a shared lock (specifically purge) will block and wait -# whereas trying to get an exclusive lock (running backups, purging) will -# immediately give up. -# - Locking is not re-entrant. It's forbidden and protected to call another -# locking main function. - - -class RestoreBackend(Enum): - AUTO = "auto" - PYTHON = "python" - RUST = "rust" - - def __str__(self): - return self.value - - -def locked(target=None, mode=None): - if mode == "shared": - mode = fcntl.LOCK_SH - elif mode == "exclusive": - mode = fcntl.LOCK_EX | fcntl.LOCK_NB - else: - raise ValueError("Unknown lock mode '{}'".format(mode)) - - def wrap(f): - def locked_function(self, *args, skip_lock=False, **kw): - if skip_lock: - return f(self, *args, **kw) - if target in self._lock_fds: - raise RuntimeError("Bug: Locking is not re-entrant.") - target_path = self.path / target - if not target_path.exists(): - target_path.touch() - self._lock_fds[target] = target_path.open() - try: - fcntl.flock(self._lock_fds[target], mode) - except BlockingIOError: - self.log.warning( - "lock-no-exclusive", - _fmt_msg="Failed to get exclusive lock for '{function}'.", - function=f.__name__, - ) - raise - else: - try: - return f(self, *args, **kw) - finally: - fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) - finally: - self._lock_fds[target].close() - del self._lock_fds[target] - - locked_function.__name__ = "locked({}, {})".format(f.__name__, target) - return locked_function - - return wrap - - -class Backup(object): - """A backup of a VM. - - Provides access to methods to - - - backup, restore, and list revisions - - """ - - path: Path - config: dict - schedule: Schedule - source: BackySourceFactory - default_backend_type: Literal["cowfile", "chunked"] - history: list[Revision] - quarantine: QuarantineStore - log: BoundLogger - - _by_uuid: dict[str, Revision] - _lock_fds: dict[str, IO] - - def __init__(self, path: Path, log: BoundLogger): - self.log = log.bind(subsystem="backup") - self._lock_fds = {} - - self.path = path.resolve() - self.scan() - - # Load config from file - try: - with self.path.joinpath("config").open(encoding="utf-8") as f: - self.config = yaml.safe_load(f) - except IOError: - self.log.error( - "could-not-read-config", - _fmt_msg="Could not read config file. Is --backupdir correct?", - config_path=str(self.path / "config"), - ) - raise - - # Initialize our source - try: - source_factory = select_source(self.config["source"]["type"]) - except IndexError: - self.log.error( - "source-type-unavailable", - _fmt_msg="No source type named `{type}` exists.", - type=self.config["source"]["type"], - ) - raise - self.source = source_factory(self.config["source"], self.log) - - # Initialize our backend - self.default_backend_type = self.config["source"].get("backend", None) - if self.default_backend_type is None: - if not self.local_history: - # Start fresh backups with our new default. - self.default_backend_type = "chunked" - else: - # Choose to continue existing backups with whatever format - # they are in. - self.default_backend_type = self.local_history[-1].backend_type - - self.schedule = Schedule() - self.schedule.configure(self.config["schedule"]) - - self.quarantine = QuarantineStore(self.path, self.log) - - @property - def name(self) -> str: - return self.path.name - - def to_dict(self): - return self.config - - def scan(self) -> None: - self.history = [] - self._by_uuid = {} - for f in self.path.glob("*.rev"): - if f.is_symlink(): - # Ignore links that are used to create readable pointers - continue - r = Revision.load(f, self, self.log) - if r.uuid not in self._by_uuid: - self._by_uuid[r.uuid] = r - self.history.append(r) - # The history is stored: oldest first. newest last. - self.history.sort(key=lambda r: r.timestamp) - - def touch(self): - self.path.touch() - - def set_purge_pending(self): - self.path.joinpath(".purge_pending").touch() - - def clear_purge_pending(self): - self.path.joinpath(".purge_pending").unlink(missing_ok=True) - - def get_history( - self, *, clean: bool = False, local: bool = False - ) -> list[Revision]: - return [ - rev - for rev in self.history - if (not clean or "duration" in rev.stats) - and (not local or not rev.server) - ] - - @property - def clean_history(self) -> List[Revision]: - """History without incomplete revisions.""" - return self.get_history(clean=True) - - @property - def local_history(self): - """History without incomplete revisions.""" - return self.get_history(local=True) - - @property - def contains_distrusted(self) -> bool: - return any( - ( - r == Trust.DISTRUSTED - for r in self.get_history(clean=True, local=True) - ) - ) - - def validate_tags(self, tags): - missing_tags = ( - filter_schedule_tags(tags) - self.schedule.schedule.keys() - ) - if missing_tags: - self.log.error( - "unknown-tags", - _fmt_msg="The following tags are missing from the schedule: {unknown_tags}\n" - "Check the config file, add the `manual:` prefix or disable tag validation (-f)", - unknown_tags=", ".join(missing_tags), - ) - raise RuntimeError("Unknown tags") - - def warn_pending_changes(self, revs: Optional[List[Revision]] = None): - revs = revs if revs is not None else self.history - pending = [r for r in revs if r.pending_changes] - if pending: - self.log.warning( - "pending-changes", - _fmt_msg="Synchronize with remote server (backy push) or risk loosing changes", - revisions=",".join(r.uuid for r in pending), - ) - - def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): - revs = revs if revs is not None else self.history - remote = [r for r in revs if r.server] - if remote: - self.log.error( - "remote-revs-disallowed", - _fmt_msg="Can not modify trust state of remote revisions locally.\n" - "Either include a filter to exclude them (local)\n" - "or edit them on the origin server and pull the changes (backy pull)", - revisions=",".join(r.uuid for r in remote), - ) - raise RuntimeError("Remote revs disallowed") - - ################# - # Making backups - - @locked(target=".backup", mode="exclusive") - def _clean(self) -> None: - """Clean-up incomplete revisions.""" - for revision in self.local_history: - if "duration" not in revision.stats: - self.log.warning( - "clean-incomplete", revision_uuid=revision.uuid - ) - revision.remove() - - @locked(target=".backup", mode="exclusive") - def forget(self, revision: str) -> None: - for r in self.find_revisions(revision): - r.remove() - - @locked(target=".backup", mode="exclusive") - def expire(self): - self.schedule.expire(self) - - @locked(target=".backup", mode="exclusive") - def tags( - self, - action: Literal["set", "add", "remove"], - revision: str, - tags: set[str], - expect: Optional[set[str]] = None, - autoremove: bool = False, - force=False, - ) -> bool: - self.scan() - revs = self.find_revisions(revision) - if not force and action != "remove": - self.validate_tags(tags) - for r in revs: - if expect is not None and expect != r.tags: - self.log.error("tags-expectation-failed") - return False - for r in revs: - match action: - case "set": - r.tags = tags - case "add": - r.tags |= tags - case "remove": - r.tags -= tags - case _: - raise ValueError(f"invalid action '{action}'") - if not r.tags and autoremove: - r.remove() - else: - r.write_info() - return True - - @locked(target=".backup", mode="exclusive") - @locked(target=".purge", mode="shared") - def backup(self, tags: set[str], force: bool = False) -> bool: - if not force: - self.validate_tags(tags) - - self.path.joinpath("last").unlink(missing_ok=True) - self.path.joinpath("last.rev").unlink(missing_ok=True) - - start = time.time() - - if not self.source.ready(): - raise RuntimeError( - "Source is not ready (does it exist? can you access it?)" - ) - - new_revision = Revision.create(self, tags, self.log) - new_revision.materialize() - self.log.info( - "created-revision", - revision_uuid=new_revision.uuid, - tags=", ".join(new_revision.tags), - ) - - backend = new_revision.backend - with self.source(new_revision) as source: - try: - source.backup(backend) - verified = source.verify(backend) - except BackendException: - self.log.exception("backend-error-distrust-all") - verified = False - self.distrust("local", skip_lock=True) - if not verified: - self.log.error( - "verification-failed", - revision_uuid=new_revision.uuid, - ) - new_revision.remove() - else: - self.log.info( - "verification-ok", revision_uuid=new_revision.uuid - ) - new_revision.stats["duration"] = time.time() - start - new_revision.write_info() - new_revision.readonly() - self.scan() - # Switched from a fine-grained syncing mechanism to "everything - # once" when we're done. This is as safe but much faster. - os.sync() - - # If there are distrusted revisions, then perform at least one - # verification after a backup - for good measure and to keep things - # moving along automatically. This could also be moved into the - # scheduler. - self.scan() - for revision in reversed(self.get_history(clean=True, local=True)): - if revision.trust == Trust.DISTRUSTED: - self.log.warning("inconsistent") - revision.backend.verify() - break - return verified - - @locked(target=".backup", mode="exclusive") - def distrust(self, revision: str) -> None: - revs = self.find_revisions(revision) - self.prevent_remote_rev(revs) - for r in revs: - r.distrust() - r.write_info() - - @locked(target=".purge", mode="shared") - def verify(self, revision: str) -> None: - revs = self.find_revisions(revision) - self.prevent_remote_rev(revs) - for r in revs: - r.backend.verify() - - @locked(target=".purge", mode="exclusive") - def purge(self) -> None: - self.local_history[-1].backend.purge() - self.clear_purge_pending() - - ################# - # Restoring - - # This needs no locking as it's only a wrapper for restore_file and - # restore_stdout and locking isn't re-entrant. - def restore( - self, - revision: str, - target: str, - restore_backend: RestoreBackend = RestoreBackend.AUTO, - ) -> None: - r = self.find(revision) - s = r.backend.open("rb") - if restore_backend == RestoreBackend.AUTO: - if self.backy_extract_supported(s): - restore_backend = RestoreBackend.RUST - else: - restore_backend = RestoreBackend.PYTHON - self.log.info("restore-backend", backend=restore_backend.value) - if restore_backend == RestoreBackend.PYTHON: - with s as source: - if target != "-": - self.restore_file(source, target) - else: - self.restore_stdout(source) - elif restore_backend == RestoreBackend.RUST: - self.restore_backy_extract(r, target) - - def backy_extract_supported(self, file: IO) -> bool: - log = self.log.bind(subsystem="backy-extract") - if not isinstance(file, backy.backends.chunked.File): - log.debug("unsupported-backend") - return False - if file.size % CHUNK_SIZE != 0: - log.debug("not-chunk-aligned") - return False - try: - version = subprocess.check_output( - [BACKY_EXTRACT, "--version"], encoding="utf-8", errors="replace" - ) - if not version.startswith("backy-extract"): - log.debug("unknown-version") - return False - except: - log.debug("unavailable") - return False - return True - - # backy-extract acquires lock - def restore_backy_extract(self, rev: Revision, target: str) -> None: - log = self.log.bind(subsystem="backy-extract") - cmd = [BACKY_EXTRACT, str(self.path / rev.uuid), target] - log.debug("started", cmd=cmd) - proc = subprocess.Popen(cmd) - return_code = proc.wait() - log.info( - "finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"backy-extract failed with return code {return_code}. Maybe try `--backend python`?" - ) - - @locked(target=".purge", mode="shared") - def restore_file(self, source: IO, target_name: str) -> None: - """Bulk-copy from open revision `source` to target file.""" - self.log.debug("restore-file", source=source.name, target=target_name) - open(target_name, "ab").close() # touch into existence - with open(target_name, "r+b", buffering=CHUNK_SIZE) as target: - try: - posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore - except Exception: - pass - copy(source, target) - - @locked(target=".purge", mode="shared") - def restore_stdout(self, source: IO) -> None: - """Emit restore data to stdout (for pipe processing).""" - self.log.debug("restore-stdout", source=source.name) - try: - posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore - except Exception: - pass - with os.fdopen(os.dup(1), "wb") as target: - while True: - chunk = source.read(CHUNK_SIZE) - if not chunk: - break - target.write(chunk) - - @locked(target=".purge", mode="shared") - def upgrade(self) -> None: - """Upgrade this backup's store from cowfile to chunked. - - This can take a long time and is intended to be interruptable. - - We start creating new backups with the new format once everything - is converted as we do not want to interfere with the config file - but allow upgrading without a format specification. - - """ - from backy.backends.chunked import ChunkedFileBackend - from backy.sources.file import File - - last_worklist: List[Revision] = [] - - while True: - self.scan() - to_upgrade: List[Revision] = [ - r - for r in self.get_history(clean=True, local=True) - if r.backend_type == "cowfile" - ] - if not to_upgrade: - break - if to_upgrade == last_worklist: - self.log.error("upgrade-no-progress") - break - last_worklist = to_upgrade - - self.log.info("upgrade-found-new", num_revisions=len(to_upgrade)) - # Upgrade the newest then start again. The revisions may change - # beneath us and this may cause a) new revisions to appear and b) - # old revisions to disappear. We want to upgraded new revisions as - # quickly as possible as having the newest upgraded means that - # then next backup will be able to use the new format and we don't - # have to re-upgrade it again. - try: - revision = to_upgrade[-1] - self.log.info( - "upgrade-converting", - revision_uuid=revision.uuid, - timestamp=revision.timestamp, - ) - original_file = revision.filename.with_suffix( - revision.filename.suffix + ".old" - ) - if not os.path.exists(original_file): - # We may be resuming a partial upgrade. Only move the file - # if our .old doesn't exist. - os.rename(revision.filename, original_file) - else: - self.log.info("upgrade-resuming") - if os.path.exists(revision.filename): - os.unlink(revision.filename) - revision.writable() - chunked = ChunkedFileBackend(revision, self.log) - file = File(dict(filename=original_file, cow=False), self.log)( - revision - ) - # Keep a copy of the statistics as it will get replaced when - # running the full copy. - original_stats = revision.stats.copy() - with file as f: - f.backup(chunked) - revision.stats = original_stats - revision.backend_type = "chunked" - revision.write_info() - revision.readonly() - os.unlink(original_file) - except Exception: - self.log.exception("upgrade-error") - # We may be seeing revisions getting removed, try again. - return - - # Wait a bit, to be graceful to the host system just in case this - # truns into a spinning loop. - time.sleep(5) - - ###################### - # Looking up revisions - - def last_by_tag(self) -> dict[str, datetime.datetime]: - """Return a dictionary showing the last time each tag was - backed up. - - Tags that have never been backed up won't show up here. - - """ - last_times: dict[str, datetime.datetime] = {} - for revision in self.clean_history: - for tag in revision.tags: - last_times.setdefault(tag, min_date()) - last_times[tag] = max([last_times[tag], revision.timestamp]) - return last_times - - def find_revisions( - self, spec: str | List[str | Revision | List[Revision]] - ) -> List[Revision]: - """Get a sorted list of revisions, oldest first, that match the given - specification. - """ - - tokens: List[str | Revision | List[Revision]] - if isinstance(spec, str): - tokens = [ - t.strip() - for t in re.split(r"(\(|\)|,|&|\.\.)", spec) - if t.strip() - ] - else: - tokens = spec - if "(" in tokens and ")" in tokens: - i = list_rindex(tokens, "(") - j = tokens.index(")", i) - prev, middle, next = tokens[:i], tokens[i + 1 : j], tokens[j + 1 :] - - functions = { - "first": lambda x: x[0], - "last": lambda x: x[-1], - "not": lambda x: [r for r in self.history if r not in x], - "reverse": lambda x: list(reversed(x)), - } - if prev and isinstance(prev[-1], str) and prev[-1] in functions: - return self.find_revisions( - prev[:-1] - + [functions[prev[-1]](self.find_revisions(middle))] - + next - ) - return self.find_revisions( - prev + [self.find_revisions(middle)] + next - ) - elif "," in tokens: - i = tokens.index(",") - return unique( - self.find_revisions(tokens[:i]) - + self.find_revisions(tokens[i + 1 :]) - ) - elif "&" in tokens: - i = tokens.index("&") - return duplicates( - self.find_revisions(tokens[:i]), - self.find_revisions(tokens[i + 1 :]), - ) - elif ".." in tokens: - _a, _b = list_split(tokens, "..") - assert len(_a) <= 1 and len(_b) <= 1 - a = self.index_by_token(list_get(_a, 0, "first")) - b = self.index_by_token(list_get(_b, 0, "last")) - return self.history[ceil(min(a, b)) : floor(max(a, b)) + 1] - assert len(tokens) == 1 - token = tokens[0] - if isinstance(token, Revision): - return [token] - elif isinstance(token, list): - return token - if token.startswith("server:"): - server = token.removeprefix("server:") - return [r for r in self.history if server == r.server] - elif token.startswith("tag:"): - tag = token.removeprefix("tag:") - return [r for r in self.history if tag in r.tags] - elif token.startswith("trust:"): - trust = Trust(token.removeprefix("trust:").lower()) - return [r for r in self.history if trust == r.trust] - elif token == "all": - return self.history[:] - elif token == "clean": - return self.clean_history - elif token == "local": - return self.find_revisions("server:") - elif token == "remote": - return self.find_revisions("not(server:)") - else: - return [self.find(token)] - - def index_by_token(self, spec: str | Revision | List[Revision]) -> float: - assert not isinstance( - spec, list - ), "can only index a single revision specifier" - if isinstance(spec, str): - return self.index_by_date(spec) or self.history.index( - self.find(spec) - ) - else: - return self.history.index(spec) - - def index_by_date(self, spec: str) -> Optional[float]: - """Return index of revision matched by datetime. - Index may be fractional if there is no exact datetime match. - Index range: [-0.5, len+0.5] - """ - try: - date = datetime.datetime.fromisoformat(spec) - date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone()) - l = list_get( - [i for i, r in enumerate(self.history) if r.timestamp <= date], - -1, - -1, - ) - r = list_get( - [i for i, r in enumerate(self.history) if r.timestamp >= date], - 0, - len(self.history), - ) - print(spec, l, r) - assert ( - 0 <= r - l <= 1 - ), "can not index with date if multiple revision have the same timestamp" - return (l + r) / 2.0 - except ValueError: - return None - - def find_by_number(self, _spec: str) -> Revision: - """Returns revision by relative number. - - 0 is the newest, - 1 is the next older, - 2 is the even next older, - and so on ... - - Raises IndexError or ValueError if no revision is found. - """ - spec = int(_spec) - if spec < 0: - raise KeyError("Integer revisions must be positive") - return self.history[-spec - 1] - - def find_by_tag(self, spec: str) -> Revision: - """Returns the latest revision matching a given tag. - - Raises IndexError or ValueError if no revision is found. - """ - if spec in ["last", "latest"]: - return self.history[-1] - if spec == "first": - return self.history[0] - raise ValueError() - - def find_by_uuid(self, spec: str) -> Revision: - """Returns revision matched by UUID. - - Raises IndexError if no revision is found. - """ - try: - return self._by_uuid[spec] - except KeyError: - raise IndexError() - - def find_by_function(self, spec: str) -> Revision: - m = re.fullmatch(r"(\w+)\(.+\)", spec) - if m and m.group(1) in ["first", "last"]: - return self.find_revisions(m.group(0))[0] - raise ValueError() - - def find(self, spec: str) -> Revision: - """Flexible revision search. - - Locates a revision by relative number, by tag, or by uuid. - - """ - spec = spec.strip() - if spec == "" or not self.history: - raise KeyError(spec) - - for find in ( - self.find_by_number, - self.find_by_uuid, - self.find_by_tag, - self.find_by_function, - ): - try: - return find(spec) - except (ValueError, IndexError): - pass - self.log.warning("find-rev-not-found", spec=spec) - raise KeyError(spec) - - ################### - # Syncing Revisions - - @locked(target=".backup", mode="exclusive") - async def push_metadata(self, peers, taskid: str) -> int: - grouped = defaultdict(list) - for r in self.clean_history: - if r.pending_changes: - grouped[r.server].append(r) - self.log.info( - "push-start", changes=sum(len(l) for l in grouped.values()) - ) - async with APIClientManager(peers, taskid, self.log) as apis: - errors = await asyncio.gather( - *[ - self._push_metadata(apis[server], grouped[server]) - for server in apis - ] - ) - self.log.info("push-end", errors=sum(errors)) - return sum(errors) - - async def _push_metadata( - self, api: APIClient, revs: List[Revision] - ) -> bool: - purge_required = False - error = False - for r in revs: - log = self.log.bind( - server=r.server, - rev_uuid=r.uuid, - ) - log.debug( - "push-updating-tags", - old_tags=r.orig_tags, - new_tags=r.tags, - ) - try: - await api.put_tags(r, autoremove=True) - if r.tags: - r.orig_tags = r.tags - r.write_info() - else: - r.remove(force=True) - purge_required = True - except ClientResponseError: - log.warning("push-client-error", exc_style="short") - error = True - except ClientConnectionError: - log.warning("push-connection-error", exc_style="short") - error = True - except ClientError: - log.exception("push-error") - error = True - - if purge_required: - log = self.log.bind(server=api.server_name) - log.debug("push-purging-remote") - try: - await api.run_purge(self.name) - except ClientResponseError: - log.warning("push-purge-client-error", exc_style="short") - error = True - except ClientConnectionError: - log.warning("push-purge-connection-error", exc_style="short") - error = True - except ClientError: - log.error("push-purge-error") - error = True - return error - - @locked(target=".backup", mode="exclusive") - async def pull_metadata(self, peers: dict, taskid: str) -> int: - async def remove_dead_peer(): - for r in list(self.history): - if r.server and r.server not in peers: - self.log.info( - "pull-removing-dead-peer", - rev_uuid=r.uuid, - server=r.server, - ) - r.remove(force=True) - return False - - self.log.info("pull-start") - async with APIClientManager(peers, taskid, self.log) as apis: - errors = await asyncio.gather( - remove_dead_peer(), - *[self._pull_metadata(apis[server]) for server in apis], - ) - self.log.info("pull-end", errors=sum(errors)) - return sum(errors) - - async def _pull_metadata(self, api: APIClient) -> bool: - error = False - log = self.log.bind(server=api.server_name) - try: - await api.touch_backup(self.name) - remote_revs = await api.get_revs(self) - log.debug("pull-found-revs", revs=len(remote_revs)) - except ClientResponseError as e: - if e.status in [ - HTTPNotFound.status_code, - HTTPForbidden.status_code, - ]: - log.debug("pull-not-found") - else: - log.warning("pull-client-error", exc_style="short") - error = True - remote_revs = [] - except ClientConnectionError: - log.warning("pull-connection-error", exc_style="short") - return True - except ClientError: - log.exception("pull-error") - error = True - remote_revs = [] - - local_uuids = { - r.uuid for r in self.history if r.server == api.server_name - } - remote_uuids = {r.uuid for r in remote_revs} - for uuid in local_uuids - remote_uuids: - log.warning("pull-removing-unknown-rev", rev_uuid=uuid) - self.find_by_uuid(uuid).remove(force=True) - - for r in remote_revs: - if r.uuid in local_uuids: - if r.to_dict() == self.find_by_uuid(r.uuid).to_dict(): - continue - log.debug("pull-updating-rev", rev_uid=r.uuid) - else: - log.debug("pull-new-rev", rev_uid=r.uuid) - r.write_info() - - return error diff --git a/src/backy/cli/__init__.py b/src/backy/cli/__init__.py new file mode 100644 index 00000000..48c1d72e --- /dev/null +++ b/src/backy/cli/__init__.py @@ -0,0 +1,758 @@ +import argparse +import asyncio +import inspect +import re +import sys +from functools import cached_property +from pathlib import Path +from typing import Any, Dict, List, Literal, Optional + +import humanize +import structlog +import tzlocal +from aiohttp import ClientResponseError +from aiohttp.web_exceptions import HTTPNotFound +from rich import print as rprint +from rich.table import Column, Table +from structlog.stdlib import BoundLogger + +import backy.daemon +import backy.source +from backy import logging +from backy.daemon import BackyDaemon +from backy.daemon.api import Client +from backy.repository import Repository +from backy.revision import Revision, filter_manual_tags +from backy.schedule import Schedule +from backy.source import SOURCE_PLUGINS, CmdLineSource +from backy.utils import BackyJSONEncoder, format_datetime_local, generate_taskid + +# single repo commands + + +# (init) + +# rev-parse (job?, rev) Print full path or uuid of specified revisions + +# log [--filter] (status) (rev) Show backup status. Show inventory and summary information + +# backup [--bg] (job) Perform a backup +# restore Restore (a given revision) to a given target + +# distrust (job, rev) Distrust specified revisions +# verify (job, rev) Verify specified revisions +# rm (job, rev) Forget specified revision +# tag (job, rev) Modify tags on revision + +# gc [--expire] [--remote|--local] (job) (Expire revisions) and collect garbage from the repository. + +# pull? (job) update metadata from all known remotes that host backups +# for the same backup source + + +# reports list/show/delete + + +# # multi-repo / daemon-based commands + +# check (job) +# show-jobs (job def: all) List status of all known jobs (integrated with log?) +# show-daemon Daemon status +# reload + +# maybe add a common --repo/--job flag? + + +class Command(object): + """Proxy between CLI calls and actual backup code.""" + + path: Path + config: Path + dry_run: bool + jobs: Optional[str] + log: BoundLogger + + def __init__( + self, + path: Path, + config: Path, + dry_run: bool, + jobs: Optional[str], + log: BoundLogger, + ): + self.path = path.resolve() + self.config = config + self.dry_run = dry_run + self.jobs = jobs + self.log = log.bind(subsystem="command") + + async def __call__(self, cmdname: str, kwargs: dict[str, Any]): + self.log.debug("call", func=cmdname, func_args=kwargs) + try: + func = getattr(self, cmdname) + params = inspect.signature(func).parameters + ret = 0 + if "repo" in params and params["repo"].annotation == Repository: + for repo in await self.get_repos(): + r = func(repo=repo, **kwargs) + if asyncio.iscoroutine(r): + r = await r + if not isinstance(r, int): + r = 0 + ret = max(ret, r) + elif ( + "repos" in params + and params["repos"].annotation == List[Repository] + ): + ret = func(repos=await self.get_repos(), **kwargs) + if asyncio.iscoroutine(ret): + ret = await ret + if not isinstance(ret, int): + ret = 0 + else: + assert ( + self.jobs is None + ), "This subcommand does not support --jobs/-a" + ret = func(**kwargs) + if asyncio.iscoroutine(ret): + ret = await ret + if not isinstance(ret, int): + ret = 0 + self.log.debug("return-code", code=ret) + return ret + except Exception: + self.log.exception("failed") + return 1 + + @cached_property + def source(self) -> CmdLineSource: + return CmdLineSource.load(self.path, self.log) + + @cached_property + def api(self): + d = BackyDaemon(self.config, self.log) + d._read_config() + taskid = self.log._context.get("taskid", generate_taskid()) + return Client.from_conf("", d.api_cli_default, taskid, self.log) + + async def get_repos(self) -> List[Repository]: + if self.jobs is None: + return [self.source.repository] + else: + jobs = await self.api.get_jobs() + assert len(jobs) > 0, "daemon has no configured job" + reg = re.compile(self.jobs) + res = [ + Repository( + Path(job["path"]), + Schedule.from_dict(job["schedule"]), + self.log, + ) + for job in jobs + if reg.search(job["name"]) + ] + assert len(res) > 0, "--jobs filter did not match" + for r in res: + r.connect() + return res + + # + # def init(self, type): + # sourcefactory = backy.source.factory_by_type(type) + # source = sourcefactory(*sourcefactory.argparse()) + # # TODO: check if repo already exists + # repo = Repository(self.path / "config", source, Schedule(), self.log) + # repo.connect() + # repo.store() + + def rev_parse(self, repo: Repository, revision: str, uuid: bool) -> None: + for rev in repo.find_revisions(revision): + if uuid: + print(rev.uuid) + else: + print(rev.info_filename) + + def log_(self, repo: Repository, json_: bool, revision: str) -> None: + revs = repo.find_revisions(revision) + if json_: + print(BackyJSONEncoder().encode([r.to_dict() for r in revs])) + return + total_bytes = 0 + + tz = tzlocal.get_localzone() + t = Table( + f"Date ({tz})", + "ID", + Column("Size", justify="right"), + Column("Duration", justify="right"), + "Tags", + "Trust", + "Server", + ) + + for r in revs: + total_bytes += r.stats.get("bytes_written", 0) + duration = r.stats.get("duration") + if duration: + duration = humanize.naturaldelta(duration) + else: + duration = "-" + + if r.pending_changes: + added = [f"+[on green]{t}[/]" for t in r.tags - r.orig_tags] + removed = [f"-[on red]{t}[/]" for t in r.orig_tags - r.tags] + same = list(r.orig_tags & r.tags) + tags = ",".join(added + removed + same) + else: + tags = ",".join(r.tags) + + t.add_row( + format_datetime_local(r.timestamp)[0], + r.uuid, + humanize.naturalsize( + r.stats.get("bytes_written", 0), binary=True + ), + duration, + tags, + r.trust.value, + f"[underline italic]{r.server}[/]" + if r.pending_changes + else r.server, + ) + + rprint(t) + + print( + "{} revisions containing {} data (estimated)".format( + len(revs), humanize.naturalsize(total_bytes, binary=True) + ) + ) + + async def backup( + self, repos: List[Repository], bg: bool, tags: str, force: bool + ) -> int: + if len(repos) > 1: + bg = True + + if bg: + for repo in repos: + log = self.log.bind(job_name=repo.name) + try: + # TODO support tags + await self.api.run_job(repo.name) + log.info("triggered-run") + except ClientResponseError as e: + if e.status == HTTPNotFound.status_code: + log.error("unknown-job") + return 1 + raise + return 0 + else: + repo = repos[0] + assert ( + self.source.repository.path == repo.path + ), "only the current job is supported without --bg" + repo._clean() + tags_ = set(t.strip() for t in tags.split(",")) + if not force: + repo.validate_tags(tags_) + r = Revision.create(repo, tags_, self.log) + r.materialize() + try: + return self.source.backup(r) + finally: + repo._clean() + + def restore( + self, + revision: str, + **restore_args: Any, + ) -> int: + r = self.source.repository.find(revision) + return self.source.restore( + r, self.source.restore_type.from_args(**restore_args) + ) + + def distrust(self, repo: Repository, revision: str) -> None: + repo.distrust(repo.find_revisions(revision)) + + def verify(self, revision: str) -> int: + # TODO support multiple repos + ret = 0 + for r in self.source.repository.find_revisions(revision): + ret = max(ret, self.source.verify(r)) + return ret + + def rm(self, repo: Repository, revision: str) -> None: + repo.rm(repo.find_revisions(revision)) + + def tags( + self, + repo: Repository, + tag_action: Literal["set", "add", "remove"], + autoremove: bool, + expect: Optional[str], + revision: str, + tags: str, + force: bool, + ) -> int: + tags_ = set(t.strip() for t in tags.split(",")) + if expect is None: + expect_ = None + else: + expect_ = set(t.strip() for t in expect.split(",")) + success = repo.tags( + tag_action, + revision, + tags_, + expect=expect_, + autoremove=autoremove, + force=force, + ) + return int(not success) + + def gc(self, repo: Repository, expire: bool, local: bool) -> None: + if expire and not local: + assert False # request pull from daemon + # XXX needs to update from remote API peers first (pull) + assert self.source.repository.path == repo.path + if expire: + repo.expire() + if expire and not local: + assert False # request push from daemon + self.source.gc() + + def reports_list(self, repo: Repository): + for id in repo.report_ids: + print(id) + + def reports_show(self, repo: Repository, reports: Optional[str]): + if reports is None: + ids = repo.report_ids + else: + ids = reports.split(",") + for id in ids: + path = repo.report_path.joinpath(id).with_suffix(".report") + print(id) + print(path.read_text(encoding="utf-8")) + + def reports_delete(self, repo: Repository, reports: Optional[str]): + log = self.log.bind(job_name=repo.name) + if reports is None: + ids = repo.report_ids + else: + ids = reports.split(",") + for id in ids: + path = repo.report_path.joinpath(id).with_suffix("report") + path.unlink() + log.info("report-deleted", id=id) + + def check(self, repo: Repository): + log = self.log.bind(job_name=repo.name) + exitcode = 0 + + manual_tags = set() + for rev in repo.history: + manual_tags |= filter_manual_tags(rev.tags) + if manual_tags: + log.info("check-manual-tags", manual_tags=", ".join(manual_tags)) + + unsynced_revs = {r for r in repo.history if r.pending_changes} + if unsynced_revs: + log.info("check-unsynced-revs", unsynced_revs=len(unsynced_revs)) + + if not repo.sla: + log.critical( + "check-sla-violation", + last_time=str( + repo.clean_history[-1].timestamp + if repo.clean_history + else None + ), + sla_overdue=repo.sla_overdue, + ) + exitcode = max(exitcode, 2) + + if repo.report_ids: + log.warning("check-reports", reports=len(repo.report_ids)) + exitcode = max(exitcode, 1) + + return exitcode + + async def show_jobs(self, repos: List[Repository]): + """List status of all known jobs. Optionally filter by regex.""" + repo_names = [r.name for r in repos] + + tz = format_datetime_local(None)[1] + + t = Table( + "Job", + "SLA", + "SLA overdue", + "Status", + f"Last Backup ({tz})", + "Last Tags", + Column("Last Duration", justify="right"), + f"Next Backup ({tz})", + "Next Tags", + ) + + jobs = await self.api.fetch_status(self.jobs) + jobs.sort(key=lambda j: j["job"]) + for job in jobs: + if job["job"] not in repo_names: + continue + overdue = ( + humanize.naturaldelta(job["sla_overdue"]) + if job["sla_overdue"] + else "-" + ) + last_duration = ( + humanize.naturaldelta(job["last_duration"]) + if job["last_duration"] + else "-" + ) + last_time = format_datetime_local(job["last_time"])[0] + next_time = format_datetime_local(job["next_time"])[0] + + t.add_row( + job["job"], + job["sla"], + overdue, + job["status"], + last_time, + job["last_tags"], + last_duration, + next_time, + job["next_tags"], + ) + backups = await self.api.list_backups() + if self.jobs: + backups = list(filter(re.compile(self.jobs).search, backups)) + for b in backups: + t.add_row(b, "-", "-", "Dead", "-", "", "-", "-", "") + + rprint(t) + print("{} jobs shown".format(len(jobs) + len(backups))) + + async def show_daemon(self): + """Show job status overview""" + t = Table("Status", "#") + state_summary: Dict[str, int] = {} + jobs = await self.api.get_jobs() + jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] + for job in jobs: + state_summary.setdefault(job["status"], 0) + state_summary[job["status"]] += 1 + + for state in sorted(state_summary): + t.add_row(state, str(state_summary[state])) + rprint(t) + + async def reload_daemon(self): + """Reload the configuration.""" + await self.api.reload_daemon() + + +def main(): + parser = argparse.ArgumentParser( + description="Backy command line client.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + parser.add_argument( + "-c", + "--config", + type=Path, + default="/etc/backy.conf", + help="(default: %(default)s)", + ) + + parser.add_argument( + "-C", + dest="workdir", + default=".", + type=Path, + help=( + "Run as if backy was started in instead of the current " + "working directory." + ), + ) + + parser.add_argument( + "-n", "--dry-run", action="store_true", help="Do not modify state." + ) + job_filter = parser.add_mutually_exclusive_group() + job_filter.add_argument( + "--jobs", + dest="jobs", + metavar="", + help="Optional job filter regex. Defaults to current workdir", + ) + + job_filter.add_argument( + "-a", + "--all", + action="store_const", + const=".*", + dest="jobs", + help="Shortcut to select all jobs", + ) + + subparsers = parser.add_subparsers() + + # TODO + # INIT + p = subparsers.add_parser("init", help="Create an empty backy repository.") + p.add_argument( + "type", + choices=backy.source.SOURCE_PLUGINS.names, + help="Type of the source.", + ) + p.set_defaults(func="init") + + # REV-PARSE + p = subparsers.add_parser( + "rev-parse", + help="Print full path or uuid of specified revisions", + ) + p.add_argument( + "--uuid", + action="store_true", + help="Print uuid instead of full path", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="all", + help="use revision SPEC to find (default: %(default)s)", + ) + p.set_defaults(func="rev_parse") + + # LOG + p = subparsers.add_parser( + "log", + help="Show backup status. Show inventory and summary information", + ) + p.add_argument("--json", dest="json_", action="store_true") + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="all", + help="use revision SPEC as filter (default: %(default)s)", + ) + p.set_defaults(func="log_") + + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.add_argument( + "-f", "--force", action="store_true", help="Do not validate tags" + ) + p.add_argument( + "--bg", + action="store_true", + help="Let the daemon run the backup job. Implied if if more than one job is selected.", + ) + p.add_argument("tags", help="Tags to apply to the backup") + p.set_defaults(func="backup") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target. The arguments vary for the different repo types.", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="latest", + help="use revision SPEC as restore source (default: %(default)s)", + ) + restore_subparsers = p.add_subparsers() + for source_type in SOURCE_PLUGINS: + source = source_type.load() + source.restore_type.setup_argparse( + restore_subparsers.add_parser(source.type_) + ) + p.set_defaults(func="restore") + + # DISTRUST + p = subparsers.add_parser( + "distrust", + help="Distrust specified revisions", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="local", + help="use revision SPEC to distrust (default: %(default)s)", + ) + p.set_defaults(func="distrust") + + # VERIFY + p = subparsers.add_parser( + "verify", + help="Verify specified revisions", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="trust:distrusted&local", + help="use revision SPEC to verify (default: %(default)s)", + ) + p.set_defaults(func="verify") + + # RM + p = subparsers.add_parser( + "rm", + help="Remove specified revision", + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + required=True, + help="use revision SPEC to remove", + ) + p.set_defaults(func="rm") + + # TAGS + p = subparsers.add_parser( + "tags", + help="Modify tags on revision", + ) + p.add_argument( + "--autoremove", + action="store_true", + help="Remove revision if no tags remain", + ) + p.add_argument( + "-f", "--force", action="store_true", help="Do not validate tags" + ) + p.add_argument( + "--expect", + metavar="", + help="Do nothing if tags differ from the expected tags", + ) + p.add_argument( + "tag_action", + choices=["set", "add", "remove"], + ) + p.add_argument( + "-r", + "--revision", + metavar="SPEC", + default="all", + help="modify tags for revision SPEC, modifies all if not given " + "(default: %(default)s)", + ) + p.add_argument( + "tags", + metavar="", + help="comma separated list of tags", + ) + p.set_defaults(func="tags") + + # GC + p = subparsers.add_parser( + "gc", + help="Purge the backup store (i.e. chunked) from unused data", + ) + p.add_argument( + "--expire", + action="store_true", + help="Expire tags according to schedule", + ) + p.add_argument( + "--local", + action="store_true", + help="Do not expire on remote servers", + ) + p.set_defaults(func="gc") + + # REPORTS-LIST + p = subparsers.add_parser("reports-list", help="List problem reports") + p.set_defaults(func="reports_list") + + # REPORTS-SHOW + p = subparsers.add_parser("reports-show", help="Show problem report") + p.add_argument( + "reports", + nargs="?", + metavar="", + help="comma separated list of report uuids", + ) + + p.set_defaults(func="reports_show") + + # REPORTS-DELETE + p = subparsers.add_parser("reports-delete", help="Delete problem report") + report_sel = p.add_mutually_exclusive_group(required=True) + report_sel.add_argument( + "reports", + nargs="?", + metavar="", + help="comma separated list of report uuids", + ) + report_sel.add_argument( + "--all-reports", + action="store_const", + const=None, + dest="reports", + help="Select all reports", + ) + p.set_defaults(func="reports_delete") + + # CHECK + p = subparsers.add_parser( + "check", + help="Check whether the selected jobs adhere to their schedules' SLA", + ) + p.set_defaults(func="check") + + # TODO: job filter default + # SHOW JOBS + p = subparsers.add_parser("show-jobs", help="List status of all known jobs") + p.set_defaults(func="show_jobs") + + # SHOW DAEMON + p = subparsers.add_parser("show-daemon", help="Show job status overview") + p.set_defaults(func="show_daemon") + + # RELOAD DAEMON + p = subparsers.add_parser("reload-daemon", help="Reload daemon config") + p.set_defaults(func="reload_daemon") + + args = parser.parse_args() + + if not hasattr(args, "func"): + parser.print_usage() + sys.exit(0) + + # Logging + + logging.init_logging(args.verbose, defaults={"taskid": generate_taskid()}) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(sys.argv)) + + command = Command(args.workdir, args.config, args.dry_run, args.jobs, log) + func = args.func + + # Pass over to function + func_args = dict(args._get_kwargs()) + del func_args["func"] + del func_args["verbose"] + del func_args["workdir"] + del func_args["config"] + del func_args["dry_run"] + del func_args["jobs"] + + sys.exit(asyncio.run(command(func, func_args))) diff --git a/src/backy/cli/tests/__init__.py b/src/backy/cli/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/tests/test_client.py b/src/backy/cli/tests/test_client.py similarity index 63% rename from src/backy/tests/test_client.py rename to src/backy/cli/tests/test_client.py index b96451b3..6dd8dcae 100644 --- a/src/backy/tests/test_client.py +++ b/src/backy/cli/tests/test_client.py @@ -6,18 +6,21 @@ from aiohttp.web_exceptions import HTTPUnauthorized from backy import utils -from backy.api import BackyAPI -from backy.client import APIClient, CLIClient +from backy.cli import Command +from backy.daemon.api import BackyAPI, Client +from backy.report import ChunkMismatchReport from backy.revision import Revision from backy.tests import Ellipsis -from ..quarantine import QuarantineReport -from .test_daemon import daemon - @pytest.fixture -def log(log): - return log.bind(job_name="-") +async def daemon(tmp_path, monkeypatch, log): + # FIXME + from backy.daemon.tests.test_daemon import daemon + + gen = daemon.__pytest_wrapped__.obj(tmp_path, monkeypatch, log) + async for i in gen: + yield i @pytest.fixture @@ -62,21 +65,22 @@ async def api_client(api, aiohttp_client, log): headers={hdrs.AUTHORIZATION: "Bearer testtoken", "taskid": "ABCD"}, raise_for_status=True, ) - api_client = APIClient( - "", "http://localhost:0", "token", "task", log - ) + api_client = Client("", "http://localhost:0", "token", "task", log) await api_client.session.close() api_client.session = client return api_client @pytest.fixture -async def cli_client(api_client, log): - return CLIClient(api_client, log) +async def command(tmp_path, api_client, log): + cmd = Command(tmp_path, tmp_path / "config", False, ".*", log) + cmd.api = api_client + return cmd -async def test_cli_jobs(cli_client, capsys): - await cli_client.jobs() +async def test_show_jobs(command, capsys): + exitcode = await command("show_jobs", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( Ellipsis( @@ -100,7 +104,9 @@ async def test_cli_jobs(cli_client, capsys): == out ) - await cli_client.jobs(filter_re="test01") + command.jobs = "test01" + exitcode = await command("show_jobs", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( Ellipsis( @@ -120,26 +126,15 @@ async def test_cli_jobs(cli_client, capsys): == out ) - await cli_client.jobs(filter_re="asdf") - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -┏━━━━━┳━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ ┃ ┃ ┃ ┃ Last ┃ ┃ ┃ Next ┃ ┃ -┃ ┃ ┃ SLA ┃ ┃ Backup ┃ Last ┃ Last ┃ Backup ┃ Next ┃ -┃ Job ┃ SLA ┃ overdue ┃ Status ┃ ... ┃ Tags ┃ Durat… ┃ ... ┃ Tags ┃ -┡━━━━━╇━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -└─────┴─────┴─────────┴────────┴─────────┴─────────┴────────┴─────────┴────────┘ -0 jobs shown -""" - ) - == out - ) + command.jobs = "asdf" + exitcode = await command("show_jobs", {}) + assert exitcode == 1 -async def test_cli_status(cli_client, capsys): - await cli_client.status() +async def test_show_daemon(command, capsys): + command.jobs = None + exitcode = await command("show_daemon", {}) + assert exitcode == 0 out, err = capsys.readouterr() assert ( """\ @@ -154,193 +149,204 @@ async def test_cli_status(cli_client, capsys): ) -async def test_cli_run(daemon, cli_client, monkeypatch): +async def test_backup_bg(daemon, command, monkeypatch): utils.log_data = "" run = mock.Mock() monkeypatch.setattr(daemon.jobs["test01"].run_immediately, "set", run) - await cli_client.run("test01") + command.jobs = "test01" + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} + ) + assert exitcode == 0 run.assert_called_once() assert ( Ellipsis( """\ +... D - command/call func='backup' func_args={'bg': True, 'tags': 'manual:a', 'force': False} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ +... D ~cli[ABCD] api/request-result response=... status_code=200 +... D test01 repo/scan-reports entries=0 ... D ~[ABCD] api/new-conn path='/v1/jobs/test01/run' query='' ... I ~cli[ABCD] api/get-job name='test01' ... I ~cli[ABCD] api/run-job name='test01' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='test01' +... I test01 command/triggered-run \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_run_missing(daemon, cli_client): +async def test_backup_bg_missing(daemon, command): utils.log_data = "" - try: - await cli_client.run("aaaa") - except SystemExit as e: - assert e.code == 1 - - assert ( - Ellipsis( - """\ -... D ~[ABCD] api/new-conn path='/v1/jobs/aaaa/run' query='' -... I ~cli[ABCD] api/get-job name='aaaa' -... I ~cli[ABCD] api/get-job-not-found name='aaaa' -... D ~cli[ABCD] api/request-result status_code=404 -... E - CLIClient/unknown-job job='aaaa' -""" - ) - == utils.log_data + command.jobs = "aaaa" + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} ) + assert exitcode == 1 -async def test_cli_runall(daemon, cli_client, monkeypatch): +async def test_backup_bg_all(daemon, command, monkeypatch): utils.log_data = "" run1 = mock.Mock() run2 = mock.Mock() monkeypatch.setattr(daemon.jobs["test01"].run_immediately, "set", run1) monkeypatch.setattr(daemon.jobs["foo00"].run_immediately, "set", run2) - await cli_client.runall() + exitcode = await command( + "backup", {"bg": True, "tags": "manual:a", "force": False} + ) + assert exitcode == 0 run1.assert_called_once() run2.assert_called_once() assert ( Ellipsis( """\ +... D - command/call func='backup' func_args={'bg': True, 'tags': 'manual:a', 'force': False} ... D ~[ABCD] api/new-conn path='/v1/jobs' query='' ... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 ... D ~[ABCD] api/new-conn path='/v1/jobs/test01/run' query='' ... I ~cli[ABCD] api/get-job name='test01' ... I ~cli[ABCD] api/run-job name='test01' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='test01' +... I test01 command/triggered-run \n\ ... D ~[ABCD] api/new-conn path='/v1/jobs/foo00/run' query='' ... I ~cli[ABCD] api/get-job name='foo00' ... I ~cli[ABCD] api/run-job name='foo00' ... D ~cli[ABCD] api/request-result status_code=202 -... I - CLIClient/triggered-run job='foo00' +... I foo00 command/triggered-run \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_reload(daemon, cli_client, monkeypatch): +async def test_reload(daemon, command, monkeypatch): utils.log_data = "" reload = mock.Mock() monkeypatch.setattr(daemon, "reload", reload) - await cli_client.reload() + command.jobs = None + exitcode = await command("reload_daemon", {}) + assert exitcode == 0 reload.assert_called_once() assert ( Ellipsis( """\ -... I - CLIClient/reloading-daemon \n\ +... D - command/call func='reload_daemon' func_args={} ... D ~[ABCD] api/new-conn path='/v1/reload' query='' ... I ~cli[ABCD] api/reload-daemon \n\ ... D ~cli[ABCD] api/request-result status_code=204 -... I - CLIClient/reloaded-daemon \n\ +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_ok(daemon, cli_client): +async def test_check_ok(daemon, command): utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 0 + exitcode = await command("check", {}) + assert exitcode == 0 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... I - CLIClient/check-exit exitcode=0 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_too_old(daemon, clock, cli_client, log): +async def test_check_too_old(daemon, clock, command, log): job = daemon.jobs["test01"] - revision = Revision.create(job.backup, set(), log) + revision = Revision.create(job.repository, set(), log) revision.timestamp = utils.now() - datetime.timedelta(hours=48) revision.stats["duration"] = 60.0 revision.materialize() utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 2 + exitcode = await command("check", {}) + assert exitcode == 2 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... C test01 CLIClient/check-sla-violation last_time='2015-08-30 07:06:47+00:00' sla_overdue=172800.0 -... I - CLIClient/check-exit exitcode=2 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... C test01 command/check-sla-violation last_time='2015-08-30 07:06:47+00:00' sla_overdue=172800.0 +... D - command/return-code code=2 """ ) == utils.log_data ) -async def test_cli_check_manual_tags(daemon, cli_client, log): +async def test_check_manual_tags(daemon, command, log): job = daemon.jobs["test01"] - revision = Revision.create(job.backup, {"manual:test"}, log) + revision = Revision.create(job.repository, {"manual:test"}, log) revision.stats["duration"] = 60.0 revision.materialize() utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 0 + exitcode = await command("check", {}) + assert exitcode == 0 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... I test01 CLIClient/check-manual-tags manual_tags='manual:test' -... I - CLIClient/check-exit exitcode=0 jobs=2 +... D test01 repo/scan-reports entries=0 +... D foo00 repo/scan-reports entries=0 +... I test01 command/check-manual-tags manual_tags='manual:test' +... D - command/return-code code=0 """ ) == utils.log_data ) -async def test_cli_check_quarantine(daemon, cli_client, log): +async def test_check_quarantine(daemon, command, log): job = daemon.jobs["test01"] - job.backup.quarantine.add_report(QuarantineReport(b"a", b"b", 0)) + job.repository.add_report(ChunkMismatchReport(b"a", b"b", 0)) utils.log_data = "" - try: - await cli_client.check() - except SystemExit as e: - assert e.code == 1 + exitcode = await command("check", {}) + assert exitcode == 1 assert ( Ellipsis( """\ -... D ~[ABCD] api/new-conn path='/v1/status' query='filter=' -... I ~cli[ABCD] api/get-status filter='' +... D - command/call func='check' func_args={} +... D ~[ABCD] api/new-conn path='/v1/jobs' query='' +... I ~cli[ABCD] api/get-jobs \n\ ... D ~cli[ABCD] api/request-result response=... status_code=200 -... W test01 CLIClient/check-quarantined reports=1 -... I - CLIClient/check-exit exitcode=1 jobs=2 +... D test01 repo/scan-reports entries=1 +... D foo00 repo/scan-reports entries=0 +... W test01 command/check-reports reports=1 +... D - command/return-code code=1 """ ) == utils.log_data diff --git a/src/backy/cli/tests/test_main.py b/src/backy/cli/tests/test_main.py new file mode 100644 index 00000000..0e365b07 --- /dev/null +++ b/src/backy/cli/tests/test_main.py @@ -0,0 +1,417 @@ +import datetime +import os +from dataclasses import dataclass +from pathlib import Path +from unittest.mock import create_autospec + +import pytest + +import backy.cli +import backy.repository +import backy.source +from backy import utils +from backy.repository import Repository +from backy.revision import Revision +from backy.tests import Ellipsis + + +def test_display_usage(capsys, argv): + with pytest.raises(SystemExit) as exit: + backy.cli.main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + """\ +usage: pytest [-h] [-v] [-c CONFIG] [-C WORKDIR] [-n] + [--jobs | -a] + {init,rev-parse,log,backup,restore,distrust,verify,rm,tags,gc,reports-list,reports-show,reports-delete,check,show-jobs,show-daemon,reload-daemon} + ... +""" + == out + ) + assert err == "" + + +def test_display_help(capsys, argv): + argv.append("--help") + with pytest.raises(SystemExit) as exit: + backy.cli.main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +usage: pytest [-h] [-v] [-c CONFIG] [-C WORKDIR] [-n] + [--jobs | -a] + {init,rev-parse,log,backup,restore,distrust,verify,rm,tags,gc,reports-list,reports-show,reports-delete,check,show-jobs,show-daemon,reload-daemon} + ... + +Backy command line client. + +positional arguments: +... +""" + ) + == out + ) + assert err == "" + + +@dataclass +class Instance: + cls: type + + def __eq__(self, other): + return isinstance(other, self.cls) + + +@pytest.mark.parametrize( + ["fun", "args", "rv", "rc", "params"], + [ + ( + "rev_parse", + ["rev-parse", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1", "uuid": False}, + ), + ( + "log_", + ["log"], + None, + 0, + {"repo": Instance(Repository), "json_": False, "revision": "all"}, + ), + ( + "backup", + ["backup", "manual:test"], + 0, + 0, + { + "repos": [Instance(Repository)], + "bg": False, + "tags": "manual:test", + "force": False, + }, + ), + ( + "backup", + ["backup", "--force", "--bg", "manual:test"], + 1, + 1, + { + "repos": [Instance(Repository)], + "bg": True, + "tags": "manual:test", + "force": True, + }, + ), + ( + "restore", + ["restore", "-r", "1", "file", "out.bin"], + None, + 0, + {"revision": "1", "target": Path("out.bin")}, + ), + ( + "distrust", + ["distrust", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1"}, + ), + ( + "verify", + ["verify", "-r", "1"], + None, + 0, + {"revision": "1"}, + ), + ( + "rm", + ["rm", "-r", "1"], + None, + 0, + {"repo": Instance(Repository), "revision": "1"}, + ), + ( + "tags", + ["tags", "set", "-r", "last", "manual:a"], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "set", + "autoremove": False, + "expect": None, + "revision": "last", + "tags": "manual:a", + "force": False, + }, + ), + ( + "tags", + [ + "tags", + "remove", + "-r", + "last", + "--autoremove", + "--expect", + "manual:b", + "manual:a", + ], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "remove", + "autoremove": True, + "expect": "manual:b", + "revision": "last", + "tags": "manual:a", + "force": False, + }, + ), + ( + "tags", + ["tags", "add", "-r", "last", "--force", "manual:a"], + None, + 0, + { + "repo": Instance(Repository), + "tag_action": "add", + "autoremove": False, + "expect": None, + "revision": "last", + "tags": "manual:a", + "force": True, + }, + ), + ( + "gc", + ["gc", "--expire"], + None, + 0, + {"repo": Instance(Repository), "expire": True, "local": False}, + ), + ( + "reports_list", + ["reports-list"], + None, + 0, + {"repo": Instance(Repository)}, + ), + ( + "reports_show", + ["reports-show"], + None, + 0, + {"repo": Instance(Repository), "reports": None}, + ), + ( + "reports_delete", + ["reports-delete", "--all-reports"], + None, + 0, + {"repo": Instance(Repository), "reports": None}, + ), + ( + "check", + ["check"], + None, + 0, + {"repo": Instance(Repository)}, + ), + ( + "show_jobs", + ["show-jobs"], + None, + 0, + {"repos": [Instance(Repository)]}, + ), + ( + "show_daemon", + ["show-daemon"], + None, + 0, + {}, + ), + ( + "reload_daemon", + ["reload-daemon"], + None, + 0, + {}, + ), + ], +) +def test_call_fun( + fun, + args, + rv, + rc, + params, + argv, + tmp_path, + monkeypatch, + log, +): + path = tmp_path / "test00" + path.mkdir() + os.chdir(path) + + with open(path / "config", "w", encoding="utf-8") as f: + f.write( + f""" +--- +path: "{path}" +schedule: + daily: + interval: 1d + keep: 7 +source: + type: file + filename: {__file__} +""" + ) + + mock = create_autospec(getattr(backy.cli.Command, fun), return_value=rv) + monkeypatch.setattr(backy.cli.Command, fun, mock) + argv.extend(["-v", "-C", str(path), *args]) + utils.log_data = "" + with pytest.raises(SystemExit) as exit: + backy.cli.main() + assert exit.value.code == rc + mock.assert_called_once() + assert (Instance(backy.cli.Command),) == mock.call_args.args + assert params == mock.call_args.kwargs + + expected = "" + expected += f"... D - command/invoked args='... -v -C ... {' '.join(args)}'\n" + expected += f"... D - command/call func='{fun}' func_args=...\n" + if "repo" in params or "repos" in params: + expected += "... D test00 repo/scan-reports entries=0\n" + expected += f"... D - command/return-code code={rc}\n" + + assert Ellipsis(expected) == utils.log_data + + +def test_call_unexpected_exception( + capsys, repository, argv, monkeypatch, log, tmp_path +): + def do_raise(*args, **kw): + raise RuntimeError("test") + + monkeypatch.setattr(backy.cli.Command, "log_", do_raise) + import os + + monkeypatch.setattr(os, "_exit", lambda x: None) + + argv.extend( + [ + "-v", + "-C", + str(repository.path), + "log", + ] + ) + utils.log_data = "" + with pytest.raises(SystemExit): + backy.cli.main() + out, err = capsys.readouterr() + assert "" == out + assert ( + Ellipsis( + """\ +... D - command/invoked args='... -v -C ... log' +... D - command/call func='log_' func_args=... +... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' +exception>\tTraceback (most recent call last): +exception>\t File ".../src/backy/cli/__init__.py", line ..., in __call__ +exception>\t ret = func(**kwargs) +exception>\t File ".../src/backy/cli/tests/test_main.py", line ..., in do_raise +exception>\t raise RuntimeError("test") +exception>\tRuntimeError: test +""" + ) + == utils.log_data + ) + + +def test_commands_wrapper_status( + repository, tmp_path, capsys, clock, tz_berlin, log +): + commands = backy.cli.Command( + tmp_path, tmp_path / "config", False, ".*", log + ) + + revision1 = Revision.create(repository, {"daily"}, log, uuid="1") + revision1.materialize() + + revision2 = Revision.create(repository, {"daily"}, log, uuid="2") + revision2.timestamp = backy.utils.now() + datetime.timedelta(hours=1) + revision2.server = "remote" + revision2.orig_tags = {"daily"} + revision2.materialize() + + revision3 = Revision.create(repository, {"new", "same"}, log, uuid="3") + revision3.timestamp = backy.utils.now() + datetime.timedelta(hours=2) + revision3.server = "remote" + revision3.orig_tags = {"old", "same"} + revision3.materialize() + + repository.connect() + commands.log_(repository, json_=False, revision="all") + out, err = capsys.readouterr() + + assert err == "" + assert out == Ellipsis( + """\ +┏━━━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ +┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ +┃ (Europe/Berli… ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ +┡━━━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ +│ 2015-09-01 │ 1 │ 0 Bytes │ - │ daily │ trusted │ │ +│ 09:06:47 │ │ │ │ │ │ │ +│ 2015-09-01 │ 2 │ 0 Bytes │ - │ daily │ trusted │ remote │ +│ 10:06:47 │ │ │ │ │ │ │ +│ 2015-09-01 │ 3 │ 0 Bytes │ - │ +new,-old,same │ trusted │ remote │ +│ 11:06:47 │ │ │ │ │ │ │ +└────────────────┴────┴─────────┴──────────┴────────────────┴─────────┴────────┘ +3 revisions containing 0 Bytes data (estimated) +""" + ) + + +def test_commands_wrapper_status_json( + repository, tmp_path, capsys, clock, tz_berlin, log +): + commands = backy.cli.Command( + tmp_path, tmp_path / "config", False, ".*", log + ) + + revision = Revision.create(repository, set(), log, uuid="1") + revision.stats["duration"] = 3.5 + revision.stats["bytes_written"] = 42 + revision.materialize() + + repository.connect() + commands.log_(repository, json_=True, revision="all") + out, err = capsys.readouterr() + + assert err == "" + assert ( + out + == """\ +[{\ +"uuid": "1", \ +"timestamp": "2015-09-01T07:06:47+00:00", \ +"parent": "", "stats": {"bytes_written": 42, "duration": 3.5}, \ +"trust": "trusted", \ +"tags": [], \ +"orig_tags": [], \ +"server": ""\ +}] +""" + ) diff --git a/src/backy/client.py b/src/backy/client.py deleted file mode 100644 index c16e9a68..00000000 --- a/src/backy/client.py +++ /dev/null @@ -1,315 +0,0 @@ -import datetime -import re -import sys -from asyncio import get_running_loop -from typing import TYPE_CHECKING, Dict, Iterator, List - -import aiohttp -import humanize -from aiohttp import ClientResponseError, ClientTimeout, TCPConnector, hdrs -from aiohttp.web_exceptions import HTTPNotFound -from rich import print as rprint -from rich.table import Column, Table -from structlog.stdlib import BoundLogger - -import backy.backup -from backy.revision import Revision -from backy.utils import format_datetime_local - -if TYPE_CHECKING: - from backy.daemon import BackyDaemon - - -class APIClientManager: - connector: TCPConnector - peers: dict[str, dict] - clients: dict[str, "APIClient"] - taskid: str - log: BoundLogger - - def __init__(self, peers: Dict[str, dict], taskid: str, log: BoundLogger): - self.connector = TCPConnector() - self.peers = peers - self.clients = dict() - self.taskid = taskid - self.log = log.bind(subsystem="APIClientManager") - - def __getitem__(self, name: str) -> "APIClient": - if name and name not in self.clients: - self.clients[name] = APIClient.from_conf( - name, self.peers[name], self.taskid, self.log, self.connector - ) - return self.clients[name] - - def __iter__(self) -> Iterator[str]: - return iter(self.peers) - - async def close(self) -> None: - for c in self.clients.values(): - await c.close() - await self.connector.close() - - async def __aenter__(self) -> "APIClientManager": - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close() - - -class APIClient: - log: BoundLogger - server_name: str - session: aiohttp.ClientSession - - def __init__( - self, - server_name: str, - url: str, - token: str, - taskid: str, - log, - connector=None, - ): - assert get_running_loop().is_running() - self.log = log.bind(subsystem="APIClient") - self.server_name = server_name - self.session = aiohttp.ClientSession( - url, - headers={hdrs.AUTHORIZATION: "Bearer " + token, "taskid": taskid}, - raise_for_status=True, - timeout=ClientTimeout(30, connect=10), - connector=connector, - connector_owner=connector is None, - ) - - @classmethod - def from_conf(cls, server_name, conf, *args, **kwargs): - return cls( - server_name, - conf["url"], - conf["token"], - *args, - **kwargs, - ) - - async def fetch_status( - self, filter: str = "" - ) -> List["BackyDaemon.StatusDict"]: - async with self.session.get( - "/v1/status", params={"filter": filter} - ) as response: - jobs = await response.json() - for job in jobs: - if job["last_time"]: - job["last_time"] = datetime.datetime.fromisoformat( - job["last_time"] - ) - if job["next_time"]: - job["next_time"] = datetime.datetime.fromisoformat( - job["next_time"] - ) - return jobs - - async def reload_daemon(self): - async with self.session.post(f"/v1/reload") as response: - return - - async def get_jobs(self) -> List[dict]: - async with self.session.get("/v1/jobs") as response: - return await response.json() - - async def run_job(self, name: str): - async with self.session.post(f"/v1/jobs/{name}/run") as response: - return - - async def list_backups(self) -> List[str]: - async with self.session.get("/v1/backups") as response: - return await response.json() - - async def run_purge(self, name: str): - async with self.session.post(f"/v1/backups/{name}/purge") as response: - return - - async def touch_backup(self, name: str): - async with self.session.post(f"/v1/backups/{name}/touch") as response: - return - - async def get_revs( - self, backup: "backy.backup.Backup", only_clean: bool = True - ) -> List[Revision]: - async with self.session.get( - f"/v1/backups/{backup.name}/revs", - params={"only_clean": int(only_clean)}, - ) as response: - json = await response.json() - revs = [Revision.from_dict(r, backup, self.log) for r in json] - for r in revs: - r.backend_type = "" - r.orig_tags = r.tags - r.server = self.server_name - return revs - - async def put_tags(self, rev: Revision, autoremove: bool = False): - async with self.session.put( - f"/v1/backups/{rev.backup.name}/revs/{rev.uuid}/tags", - json={"old_tags": list(rev.orig_tags), "new_tags": list(rev.tags)}, - params={"autoremove": int(autoremove)}, - ) as response: - return - - async def close(self): - await self.session.close() - - async def __aenter__(self) -> "APIClient": - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close() - - -class CLIClient: - api: APIClient - log: BoundLogger - - def __init__(self, apiclient, log): - self.api = apiclient - self.log = log.bind(subsystem="CLIClient") - - async def __aenter__(self) -> "CLIClient": - await self.api.__aenter__() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.api.__aexit__(exc_type, exc_val, exc_tb) - - async def jobs(self, filter_re=""): - """List status of all known jobs. Optionally filter by regex.""" - - tz = format_datetime_local(None)[1] - - t = Table( - "Job", - "SLA", - "SLA overdue", - "Status", - f"Last Backup ({tz})", - "Last Tags", - Column("Last Duration", justify="right"), - f"Next Backup ({tz})", - "Next Tags", - ) - - jobs = await self.api.fetch_status(filter_re) - jobs.sort(key=lambda j: j["job"]) - for job in jobs: - overdue = ( - humanize.naturaldelta(job["sla_overdue"]) - if job["sla_overdue"] - else "-" - ) - last_duration = ( - humanize.naturaldelta(job["last_duration"]) - if job["last_duration"] - else "-" - ) - last_time = format_datetime_local(job["last_time"])[0] - next_time = format_datetime_local(job["next_time"])[0] - - t.add_row( - job["job"], - job["sla"], - overdue, - job["status"], - last_time, - job["last_tags"], - last_duration, - next_time, - job["next_tags"], - ) - backups = await self.api.list_backups() - if filter_re: - backups = list(filter(re.compile(filter_re).search, backups)) - for b in backups: - t.add_row( - b, - "-", - "-", - "Dead", - "-", - "", - "-", - "-", - "", - ) - - rprint(t) - print("{} jobs shown".format(len(jobs) + len(backups))) - - async def status(self): - """Show job status overview""" - t = Table("Status", "#") - state_summary: Dict[str, int] = {} - jobs = await self.api.get_jobs() - jobs += [{"status": "Dead"} for _ in await self.api.list_backups()] - for job in jobs: - state_summary.setdefault(job["status"], 0) - state_summary[job["status"]] += 1 - - for state in sorted(state_summary): - t.add_row(state, str(state_summary[state])) - rprint(t) - - async def run(self, job: str): - """Trigger immediate run for one job""" - try: - await self.api.run_job(job) - except ClientResponseError as e: - if e.status == HTTPNotFound.status_code: - self.log.error("unknown-job", job=job) - sys.exit(1) - raise - self.log.info("triggered-run", job=job) - - async def runall(self): - """Trigger immediate run for all jobs""" - jobs = await self.api.get_jobs() - for job in jobs: - await self.run(job["name"]) - - async def reload(self): - """Reload the configuration.""" - self.log.info("reloading-daemon") - await self.api.reload_daemon() - self.log.info("reloaded-daemon") - - async def check(self): - status = await self.api.fetch_status() - - exitcode = 0 - - for job in status: - log = self.log.bind(job_name=job["job"]) - if job["manual_tags"]: - log.info( - "check-manual-tags", - manual_tags=job["manual_tags"], - ) - if job["unsynced_revs"]: - self.log.info( - "check-unsynced-revs", unsynced_revs=job["unsynced_revs"] - ) - if job["sla"] != "OK": - log.critical( - "check-sla-violation", - last_time=str(job["last_time"]), - sla_overdue=job["sla_overdue"], - ) - exitcode = max(exitcode, 2) - if job["quarantine_reports"]: - log.warning( - "check-quarantined", reports=job["quarantine_reports"] - ) - exitcode = max(exitcode, 1) - - self.log.info("check-exit", exitcode=exitcode, jobs=len(status)) - raise SystemExit(exitcode) diff --git a/src/backy/conftest.py b/src/backy/conftest.py index 980c9448..5f9b9026 100644 --- a/src/backy/conftest.py +++ b/src/backy/conftest.py @@ -1,21 +1,37 @@ import datetime -import json import os import random -import shutil +import sys from unittest import mock from zoneinfo import ZoneInfo import pytest import structlog +import tzlocal -import backy.backup import backy.logging -import backy.main import backy.schedule +import backy.source from backy import utils +from backy.file import FileSource +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule -fixtures = os.path.dirname(__file__) + "/tests/samples" + +def create_rev(repository, tags) -> Revision: + r = Revision.create(repository, tags, repository.log) + r.materialize() + repository.scan() + return repository.find_by_uuid(r.uuid) + + +@pytest.fixture +def tz_berlin(monkeypatch): + """Fix time zone to gain independece from runtime environment.""" + monkeypatch.setattr( + tzlocal, "get_localzone", lambda: ZoneInfo("Europe/Berlin") + ) @pytest.fixture(autouse=True, scope="session") @@ -26,14 +42,6 @@ def fix_pytest_coverage_465(): ) -@pytest.fixture -def simple_file_config(tmp_path, monkeypatch, log): - shutil.copy(fixtures + "/simple_file/config", str(tmp_path)) - monkeypatch.chdir(tmp_path) - b = backy.backup.Backup(tmp_path, log) - return b - - def pytest_assertrepr_compare(op, left, right): if left.__class__.__name__ != "Ellipsis": return @@ -42,15 +50,6 @@ def pytest_assertrepr_compare(op, left, right): return report.diff -@pytest.fixture(autouse=True) -def log(monkeypatch): - def noop_init_logging(*args, **kwargs): - pass - - monkeypatch.setattr(backy.logging, "init_logging", noop_init_logging) - return structlog.stdlib.get_logger() - - @pytest.fixture(autouse=True) def fix_cwd(): cwd = os.getcwd() @@ -79,26 +78,25 @@ def seed_random(monkeypatch): @pytest.fixture def schedule(): - schedule = backy.schedule.Schedule() + schedule = Schedule() schedule.configure({"daily": {"interval": "1d", "keep": 5}}) return schedule -@pytest.fixture(params=["chunked", "cowfile"]) -def backup(request, schedule, tmp_path, log): - with open(str(tmp_path / "config"), "w", encoding="utf-8") as f: - json.dump( - { - "source": { - "type": "file", - "filename": "test", - "backend": request.param, - }, - "schedule": schedule.to_dict(), - }, - f, - ) - return backy.backup.Backup(tmp_path, log) +@pytest.fixture +def repository(tmp_path, schedule, log): + repo = Repository(tmp_path, schedule, log) + repo.connect() + return repo + + +@pytest.fixture(autouse=True) +def log(monkeypatch): + def noop_init_logging(*args, **kwargs): + pass + + monkeypatch.setattr(backy.logging, "init_logging", noop_init_logging) + return structlog.stdlib.get_logger() @pytest.fixture(scope="session") @@ -116,3 +114,24 @@ def msg(self, message: str): @pytest.fixture(autouse=True) def reset_structlog(setup_structlog): utils.log_data = "" + + +@pytest.fixture(autouse=True) +def no_subcommand(monkeypatch): + def sync_invoke(self, *args): + return FileSource.main(*args) + + async def async_invoke(self, *args): + return FileSource.main(*args) + + monkeypatch.setattr(backy.source.CmdLineSource, "invoke", sync_invoke) + monkeypatch.setattr(backy.source.AsyncCmdLineSource, "invoke", async_invoke) + + +@pytest.fixture +def argv(): + original = sys.argv + new = original[:1] + sys.argv = new + yield new + sys.argv = original diff --git a/src/backy/daemon.py b/src/backy/daemon/__init__.py similarity index 83% rename from src/backy/daemon.py rename to src/backy/daemon/__init__.py index 03eb1764..34ba8799 100644 --- a/src/backy/daemon.py +++ b/src/backy/daemon/__init__.py @@ -1,5 +1,7 @@ +# -*- encoding: utf-8 -*- + +import argparse import asyncio -import datetime import fcntl import os import os.path as p @@ -7,19 +9,22 @@ import sys import time from pathlib import Path -from typing import IO, List, Optional, Pattern, TypedDict +from typing import IO, List, Optional, Pattern import aiofiles.os as aos import aioshutil +import structlog import yaml from structlog.stdlib import BoundLogger +from backy import logging +from backy.repository import Repository, StatusDict +from backy.revision import filter_manual_tags +from backy.schedule import Schedule +from backy.utils import has_recent_changes, is_dir_no_symlink + from .api import BackyAPI -from .backup import Backup -from .revision import filter_manual_tags -from .schedule import Schedule from .scheduler import Job -from .utils import has_recent_changes, is_dir_no_symlink daemon: "BackyDaemon" @@ -38,7 +43,7 @@ class BackyDaemon(object): config: dict schedules: dict[str, Schedule] jobs: dict[str, Job] - dead_backups: dict[str, Backup] + dead_repositories: dict[str, Repository] backup_semaphores: dict[str, asyncio.BoundedSemaphore] log: BoundLogger @@ -54,7 +59,7 @@ def __init__(self, config_file: Path, log: BoundLogger): self.schedules = {} self.backup_semaphores = {} self.jobs = {} - self.dead_backups = {} + self.dead_repositories = {} self._lock = None self.reload_api = asyncio.Event() self.api_addrs = ["::1", "127.0.0.1"] @@ -132,13 +137,14 @@ def _apply_config(self): del self.jobs[name] self.log.info("deleted-job", job_name=name) - self.dead_backups.clear() + self.dead_repositories.clear() for b in os.scandir(self.base_dir): if b.name in self.jobs or not b.is_dir(follow_symlinks=False): continue try: - self.dead_backups[b.name] = Backup( + self.dead_repositories[b.name] = Repository( self.base_dir / b.name, + Schedule(), self.log.bind(job_name=b.name), ) self.log.info("found-backup", job_name=b.name) @@ -276,33 +282,67 @@ async def shutdown_loop(self): self.log.info("stopping-loop") self.loop.stop() - class StatusDict(TypedDict): - job: str - sla: str - sla_overdue: int - status: str - last_time: Optional[datetime.datetime] - last_tags: Optional[str] - last_duration: Optional[float] - next_time: Optional[datetime.datetime] - next_tags: Optional[str] - manual_tags: str - quarantine_reports: int - unsynced_revs: int - local_revs: int + async def purge_old_files(self): + # This is a safety belt so we do not accidentlly NOT delete old backups + # of deleted VMs. + # XXX This should likely be implemented as a check to indicate that + # we missed a deletion marker and should delete something and not + # silently delete it. + while True: + try: + self.log.info("purge-scanning") + for candidate in await aos.scandir(self.base_dir): + if not await is_dir_no_symlink(candidate.path): + continue + self.log.debug("purge-candidate", candidate=candidate.path) + reference_time = time.time() - 3 * 31 * 24 * 60 * 60 + if not await has_recent_changes( + candidate.path, reference_time + ): + self.log.info("purging", candidate=candidate.path) + await aioshutil.rmtree(candidate) + self.log.info("purge-finished") + except Exception: + self.log.exception("purge") + await asyncio.sleep(24 * 60 * 60) + + async def purge_pending_backups(self): + # XXX This isn't to purge "pending backups" but this means + # "process pending purges" ... + while True: + try: + self.log.info("purge-pending-scanning") + for candidate in await aos.scandir(self.base_dir): + if ( + candidate.name in self.jobs # will get purged anyway + or not await is_dir_no_symlink(candidate.path) + or not await aos.path.exists( + p.join(candidate.path, ".purge_pending") + ) + ): + continue + self.log.info("purging-pending", job=candidate.name) + await Job(self, candidate.name, self.log).run_gc() + self.log.info("purge-pending-finished") + except Exception: + self.log.exception("purge-pending") + await asyncio.sleep(24 * 60 * 60) + # XXX this is duplicated in the client def status( self, filter_re: Optional[Pattern[str]] = None ) -> List[StatusDict]: """Collects status information for all jobs.""" - result: List["BackyDaemon.StatusDict"] = [] + # XXX with a database backend, we can evaluate this in live actually + # so this should move to the CLI client + result: List[StatusDict] = [] for job in list(self.jobs.values()): if filter_re and not filter_re.search(job.name): continue - job.backup.scan() + job.repository.scan() manual_tags = set() unsynced_revs = 0 - history = job.backup.clean_history + history = job.repository.clean_history for rev in history: manual_tags |= filter_manual_tags(rev.tags) if rev.pending_changes: @@ -310,8 +350,8 @@ def status( result.append( dict( job=job.name, - sla="OK" if job.sla else "TOO OLD", - sla_overdue=job.sla_overdue, + sla="OK" if job.repository.sla else "TOO OLD", + sla_overdue=job.repository.sla_overdue, status=job.status, last_time=history[-1].timestamp if history else None, last_tags=( @@ -331,60 +371,52 @@ def status( else None ), manual_tags=", ".join(manual_tags), - quarantine_reports=len(job.backup.quarantine.report_ids), + problem_reports=len(job.repository.report_ids), unsynced_revs=unsynced_revs, local_revs=len( - job.backup.get_history(clean=True, local=True) + job.repository.get_history(clean=True, local=True) ), ) ) return result - async def purge_old_files(self): - while True: - try: - self.log.info("purge-scanning") - for candidate in await aos.scandir(self.base_dir): - if not await is_dir_no_symlink(candidate.path): - continue - self.log.debug("purge-candidate", candidate=candidate.path) - reference_time = time.time() - 3 * 31 * 24 * 60 * 60 - if not await has_recent_changes( - candidate.path, reference_time - ): - self.log.info("purging", candidate=candidate.path) - await aioshutil.rmtree(candidate) - self.log.info("purge-finished") - except Exception: - self.log.exception("purge") - await asyncio.sleep(24 * 60 * 60) - - async def purge_pending_backups(self): - while True: - try: - self.log.info("purge-pending-scanning") - for candidate in await aos.scandir(self.base_dir): - if ( - candidate.name in self.jobs # will get purged anyway - or not await is_dir_no_symlink(candidate.path) - or not await aos.path.exists( - p.join(candidate.path, ".purge_pending") - ) - ): - continue - self.log.info("purging-pending", job=candidate.name) - await Job(self, candidate.name, self.log).run_purge() - self.log.info("purge-pending-finished") - except Exception: - self.log.exception("purge-pending") - await asyncio.sleep(24 * 60 * 60) +def main(): + parser = argparse.ArgumentParser( + description="Backy daemon - runs the scheduler and API.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + parser.add_argument( + "-l", + "--logfile", + default=Path("/var/log/backy.log"), + type=Path, + help=( + "file name to write log output in. " + "(default: /var/log/backy.log for `scheduler`, " + "ignored for `client`, $backupdir/backy.log otherwise)" + ), + ) + parser.add_argument( + "-c", + "--config", + type=Path, + default="/etc/backy.conf", + help="(default: %(default)s)", + ) + args = parser.parse_args() + + # Logging + logging.init_logging(args.verbose, args.logfile) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(sys.argv)) -def main(config_file: Path, log: BoundLogger): # pragma: no cover global daemon loop = asyncio.get_event_loop() - daemon = BackyDaemon(config_file, log) + daemon = BackyDaemon(args.config, log) daemon.start(loop) daemon.api_server() daemon.run_forever() diff --git a/src/backy/api.py b/src/backy/daemon/api.py similarity index 53% rename from src/backy/api.py rename to src/backy/daemon/api.py index d59ee6ff..f20afcde 100644 --- a/src/backy/api.py +++ b/src/backy/daemon/api.py @@ -1,10 +1,10 @@ import datetime import re -from json import JSONEncoder -from pathlib import Path -from typing import TYPE_CHECKING, Any, List, Tuple +from asyncio import get_running_loop +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple -from aiohttp import hdrs, web +import aiohttp +from aiohttp import ClientTimeout, TCPConnector, hdrs, web from aiohttp.web_exceptions import ( HTTPAccepted, HTTPBadRequest, @@ -18,23 +18,21 @@ from aiohttp.web_runner import AppRunner, TCPSite from structlog.stdlib import BoundLogger -from backy.backup import Backup +import backy.repository +from backy.repository import Repository, StatusDict from backy.revision import Revision -from backy.scheduler import Job -from backy.utils import generate_taskid +from backy.utils import BackyJSONEncoder, generate_taskid if TYPE_CHECKING: from backy.daemon import BackyDaemon + from .scheduler import Job -class BackyJSONEncoder(JSONEncoder): - def default(self, o: Any) -> Any: - if hasattr(o, "to_dict"): - return o.to_dict() - elif isinstance(o, datetime.datetime): - return o.isoformat() - else: - super().default(o) + +def to_json(response: Any) -> aiohttp.web.StreamResponse: + if response is None: + raise web.HTTPNoContent() + return web.json_response(response, dumps=BackyJSONEncoder().encode) class BackyAPI: @@ -49,7 +47,7 @@ def __init__(self, daemon, log): self.daemon = daemon self.sites = {} self.app = web.Application( - middlewares=[self.log_conn, self.require_auth, self.to_json] + middlewares=[self.log_conn, self.require_auth] ) self.app.add_routes( [ @@ -136,35 +134,29 @@ async def require_auth(self, request: web.Request, handler): request["log"] = request["log"].bind(job_name="~" + client) return await handler(request) - @middleware - async def to_json(self, request: web.Request, handler): - resp = await handler(request) - if isinstance(resp, web.Response): - return resp - elif resp is None: - raise web.HTTPNoContent() - else: - return web.json_response(resp, dumps=BackyJSONEncoder().encode) - async def get_status( self, request: web.Request - ) -> List["BackyDaemon.StatusDict"]: + ) -> aiohttp.web.StreamResponse: filter = request.query.get("filter", None) request["log"].info("get-status", filter=filter) - if filter: - filter = re.compile(filter) - return self.daemon.status(filter) + filter_re = re.compile(filter) if filter else None + return to_json(self.daemon.status(filter_re)) async def reload_daemon(self, request: web.Request): request["log"].info("reload-daemon") self.daemon.reload() + return to_json(None) - async def get_jobs(self, request: web.Request) -> List[Job]: + async def get_jobs(self, request: web.Request): request["log"].info("get-jobs") - return list(self.daemon.jobs.values()) + return to_json(list(self.daemon.jobs.values())) - async def get_job(self, request: web.Request) -> Job: + async def get_job(self, request: web.Request) -> "Job": name = request.match_info.get("job_name") + if name is None: + request["log"].info("empty-job") + raise HTTPNotFound() + request["log"].info("get-job", name=name) try: return self.daemon.jobs[name] @@ -178,20 +170,20 @@ async def run_job(self, request: web.Request): j.run_immediately.set() raise HTTPAccepted() - async def list_backups(self, request: web.Request) -> List[str]: + async def list_backups(self, request: web.Request): request["log"].info("list-backups") - return list(self.daemon.dead_backups.keys()) + return to_json(list(self.daemon.dead_repositories.keys())) async def get_backup( self, request: web.Request, allow_active: bool - ) -> Backup: + ) -> Repository: name = request.match_info.get("backup_name") request["log"].info("get-backups", name=name) - if name in self.daemon.dead_backups: - return self.daemon.dead_backups[name] + if name in self.daemon.dead_repositories: + return self.daemon.dead_repositories[name] if name in self.daemon.jobs: if allow_active: - return self.daemon.jobs[name].backup + return self.daemon.jobs[name].repository request["log"].info("get-backups-forbidden", name=name) raise HTTPForbidden() request["log"].info("get-backups-not-found", name=name) @@ -207,13 +199,16 @@ async def touch_backup(self, request: web.Request): backup = await self.get_backup(request, True) request["log"].info("touch-backup", name=backup.name) backup.touch() + raise web.HTTPNoContent() - async def get_revs(self, request: web.Request) -> List[Revision]: + async def get_revs(self, request: web.Request): backup = await self.get_backup(request, True) request["log"].info("get-revs", name=backup.name) backup.scan() - return backup.get_history( - local=True, clean=request.query.get("only_clean", "") == "1" + return to_json( + backup.get_history( + local=True, clean=request.query.get("only_clean", "") == "1" + ) ) async def put_tags(self, request: web.Request): @@ -252,3 +247,148 @@ async def put_tags(self, request: web.Request): except BlockingIOError: request["log"].info("put-tags-locked") raise HTTPServiceUnavailable() + raise web.HTTPNoContent() + + +class ClientManager: + connector: TCPConnector + peers: dict[str, dict] + clients: dict[str, "Client"] + taskid: str + log: BoundLogger + + def __init__(self, peers: Dict[str, dict], taskid: str, log: BoundLogger): + self.connector = TCPConnector() + self.peers = peers + self.clients = dict() + self.taskid = taskid + self.log = log.bind(subsystem="ClientManager") + + def __getitem__(self, name: str) -> "Client": + if name and name not in self.clients: + self.clients[name] = Client.from_conf( + name, self.peers[name], self.taskid, self.log, self.connector + ) + return self.clients[name] + + def __iter__(self) -> Iterator[str]: + return iter(self.peers) + + async def close(self) -> None: + for c in self.clients.values(): + await c.close() + await self.connector.close() + + async def __aenter__(self) -> "ClientManager": + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + +class Client: + log: BoundLogger + server_name: str + session: aiohttp.ClientSession + + def __init__( + self, + server_name: str, + url: str, + token: str, + taskid: str, + log, + connector=None, + ): + assert get_running_loop().is_running() + self.log = log.bind(subsystem="APIClient") + self.server_name = server_name + self.session = aiohttp.ClientSession( + url, + headers={hdrs.AUTHORIZATION: "Bearer " + token, "taskid": taskid}, + raise_for_status=True, + timeout=ClientTimeout(30, connect=10), + connector=connector, + connector_owner=connector is None, + ) + + @classmethod + def from_conf(cls, server_name, conf, *args, **kwargs): + return cls( + server_name, + conf["url"], + conf["token"], + *args, + **kwargs, + ) + + async def fetch_status(self, filter: str = "") -> List[StatusDict]: + async with self.session.get( + "/v1/status", params={"filter": filter} + ) as response: + jobs = await response.json() + for job in jobs: + if job["last_time"]: + job["last_time"] = datetime.datetime.fromisoformat( + job["last_time"] + ) + if job["next_time"]: + job["next_time"] = datetime.datetime.fromisoformat( + job["next_time"] + ) + return jobs + + async def reload_daemon(self): + async with self.session.post("/v1/reload"): + return + + async def get_jobs(self) -> List[dict]: + async with self.session.get("/v1/jobs") as response: + return await response.json() + + async def run_job(self, name: str): + async with self.session.post(f"/v1/jobs/{name}/run"): + return + + async def list_backups(self) -> List[str]: + async with self.session.get("/v1/backups") as response: + return await response.json() + + async def run_purge(self, name: str): + async with self.session.post(f"/v1/backups/{name}/purge"): + return + + async def touch_backup(self, name: str): + async with self.session.post(f"/v1/backups/{name}/touch"): + return + + async def get_revs( + self, repository: "backy.repository.Repository", only_clean: bool = True + ) -> List[Revision]: + async with self.session.get( + f"/v1/backups/{repository.name}/revs", + params={"only_clean": int(only_clean)}, + ) as response: + json = await response.json() + revs = [Revision.from_dict(r, repository, self.log) for r in json] + for r in revs: + r.orig_tags = r.tags + r.server = self.server_name + return revs + + async def put_tags(self, rev: Revision, autoremove: bool = False): + async with self.session.put( + f"/v1/backups/{rev.repository.name}/revs/{rev.uuid}/tags", + json={"old_tags": list(rev.orig_tags), "new_tags": list(rev.tags)}, + params={"autoremove": int(autoremove)}, + ): + return + + async def close(self): + await self.session.close() + + async def __aenter__(self) -> "Client": + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() diff --git a/src/backy/scheduler.py b/src/backy/daemon/scheduler.py similarity index 54% rename from src/backy/scheduler.py rename to src/backy/daemon/scheduler.py index 7c879074..9fea1d6a 100644 --- a/src/backy/scheduler.py +++ b/src/backy/daemon/scheduler.py @@ -1,44 +1,43 @@ import asyncio import datetime -import filecmp import hashlib -import os import random import subprocess +from collections import defaultdict from datetime import timedelta from pathlib import Path -from typing import TYPE_CHECKING, Literal, Optional, Set +from typing import TYPE_CHECKING, List, Literal, Optional, Set import yaml -from aiohttp import ClientError +from aiohttp import ClientConnectionError, ClientError, ClientResponseError +from aiohttp.web_exceptions import HTTPForbidden, HTTPNotFound from structlog.stdlib import BoundLogger import backy.utils +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule +from backy.utils import format_datetime_local, generate_taskid, time_or_event -from .backup import Backup -from .client import APIClientManager -from .ext_deps import BACKY_CMD -from .schedule import Schedule -from .utils import ( - SafeFile, - format_datetime_local, - generate_taskid, - time_or_event, -) +from ..source import AsyncCmdLineSource +from .api import Client, ClientManager if TYPE_CHECKING: from backy.daemon import BackyDaemon + from backy.repository import StatusDict + + +def locked(target: str, mode: Literal["shared", "exclusive"]): + return Repository.locked(target, mode, repo_attr="repository") class Job(object): name: str - source: dict - schedule_name: str + source: AsyncCmdLineSource status: str = "" next_time: Optional[datetime.datetime] = None next_tags: Optional[set[str]] = None path: Path - backup: Backup logfile: Path last_config: Optional[dict] = None daemon: "BackyDaemon" @@ -59,10 +58,12 @@ def __init__(self, daemon: "BackyDaemon", name: str, log: BoundLogger): self.logfile = self.path / "backy.log" def configure(self, config: dict) -> None: - self.source = config["source"] - self.schedule_name = config["schedule"] - self.update_config() - self.backup = Backup(self.path, self.log) + repository = Repository( + self.path, self.daemon.schedules[config["schedule"]], self.log + ) + repository.connect() + self.source = AsyncCmdLineSource(repository, config["source"], self.log) + self.source.store() self.last_config = config @property @@ -75,58 +76,23 @@ def spread(self) -> int: return generator.randint(0, limit) @property - def sla(self) -> bool: - """Is the SLA currently held? - - The SLA being held is only reflecting the current status. - - It does not help to reflect on past situations that have failed as - those are not indicators whether and admin needs to do something - right now. - """ - return not self.sla_overdue - - @property - def sla_overdue(self) -> int: - """Amount of time the SLA is currently overdue.""" - if not self.backup.clean_history: - return 0 - age = backy.utils.now() - self.backup.clean_history[-1].timestamp - max_age = min(x["interval"] for x in self.schedule.schedule.values()) - if age > max_age * 1.5: - return age.total_seconds() - return 0 + def schedule(self) -> Schedule: + return self.repository.schedule @property - def schedule(self) -> Schedule: - return self.daemon.schedules[self.schedule_name] + def repository(self) -> Repository: + return self.source.repository def update_status(self, status: str) -> None: self.status = status self.log.debug("updating-status", status=self.status) - def update_config(self) -> None: - """Writes config file for 'backy backup' subprocess.""" - - # We do not want to create leading directories, only - # the backup directory itself. If the base directory - # does not exist then we likely don't have a correctly - # configured environment. - self.path.mkdir(exist_ok=True) - config = self.path / "config" - with SafeFile(config, encoding="utf-8") as f: - f.open_new("wb") - yaml.safe_dump( - {"source": self.source, "schedule": self.schedule.config}, f - ) - if config.exists() and filecmp.cmp(config, f.name): - raise ValueError("not changed") - def to_dict(self) -> dict: return { "name": self.name, + "path": self.path, "status": self.status, - "source": self.source, + # "source": self.source, "schedule": self.schedule.to_dict(), } @@ -141,20 +107,24 @@ async def _wait_for_deadline(self) -> Optional[Literal[True]]: async def _wait_for_leader(self, next_time: datetime.datetime) -> bool: api = None try: - api = APIClientManager(self.daemon.peers, self.taskid, self.log) + api = ClientManager(self.daemon.peers, self.taskid, self.log) statuses = await asyncio.gather( *[api[server].fetch_status(f"^{self.name}$") for server in api], return_exceptions=True, ) leader = None - leader_revs = len(self.backup.get_history(clean=True, local=True)) - leader_status: "BackyDaemon.StatusDict" + leader_revs = len( + self.repository.get_history(clean=True, local=True) + ) + leader_status: "StatusDict" self.log.info("local-revs", local_revs=leader_revs) for server, status in zip(api, statuses): log = self.log.bind(server=server) if isinstance(status, BaseException): log.info( - "server-unavailable", exc_info=status, exc_style="short" + "server-unavailable", + exc_info=status, + exc_style="short", ) continue num_remote_revs = status[0]["local_revs"] @@ -226,12 +196,19 @@ async def run_forever(self) -> None: self.log.debug("loop-started") while True: self.taskid = generate_taskid() + # TODO: use contextvars self.log = self.log.bind(job_name=self.name, sub_taskid=self.taskid) - self.backup = Backup(self.path, self.log) + self.source.log = self.source.log.bind( + job_name=self.name, sub_taskid=self.taskid + ) + self.repository.log = self.repository.log.bind( + job_name=self.name, sub_taskid=self.taskid + ) + self.repository.connect() next_time, next_tags = self.schedule.next( - backy.utils.now(), self.spread, self.backup + backy.utils.now(), self.spread, self.repository ) if self.errors: @@ -268,8 +245,8 @@ async def run_forever(self) -> None: else: speed = "slow" if ( - self.backup.clean_history - and self.backup.clean_history[-1].stats["duration"] + self.repository.clean_history + and self.repository.clean_history[-1].stats["duration"] < 600 ): speed = "fast" @@ -278,11 +255,14 @@ async def run_forever(self) -> None: async with self.daemon.backup_semaphores[speed]: self.update_status(f"running ({speed})") + self.repository._clean() await self.run_backup(next_tags) + self.repository.scan() + self.repository._clean() await self.pull_metadata() await self.run_expiry() await self.push_metadata() - await self.run_purge() + await self.run_gc() await self.run_callback() except asyncio.CancelledError: raise @@ -305,178 +285,23 @@ async def run_forever(self) -> None: self.backoff = 0 self.update_status("finished") - async def pull_metadata(self) -> None: - self.log.info("pull-metadata-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "pull", - "-c", - self.daemon.config_file, - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "pull-metadata-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("pull-metadata-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise - - async def push_metadata(self) -> None: - self.log.info("push-metadata-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "push", - "-c", - self.daemon.config_file, - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "push-metadata-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("push-metadata-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise - async def run_backup(self, tags: Set[str]) -> None: self.log.info("backup-started", tags=", ".join(tags)) - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - str(self.path), - "-l", - str(self.logfile), - "backup", - ",".join(tags), - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "backup-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"Backup failed with return code {return_code}" - ) - except asyncio.CancelledError: - self.log.warning("backup-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise + + r = Revision.create(self.repository, tags, self.log) + r.materialize() + return_code = await self.source.backup(r) + if return_code: + raise RuntimeError(f"Backup failed with return code {return_code}") async def run_expiry(self) -> None: self.log.info("expiry-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - self.path, - "-l", - self.logfile, - "expire", - close_fds=True, - start_new_session=True, # Avoid signal propagation like Ctrl-C - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "expiry-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - if return_code: - raise RuntimeError( - f"Expiry failed with return code {return_code}" - ) - except asyncio.CancelledError: - self.log.warning("expiry-cancelled") - try: - proc.terminate() - except ProcessLookupError: - pass - raise + # includes lock and repository.scan() + self.repository.expire() - async def run_purge(self) -> None: - self.log.info("purge-started") - proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-t", - self.taskid, - "-b", - str(self.path), - "-l", - str(self.logfile), - "purge", - # start_new_session=True, # Avoid signal propagation like Ctrl-C. - # close_fds=True, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - return_code = await proc.wait() - self.log.info( - "purge-finished", - return_code=return_code, - subprocess_pid=proc.pid, - ) - except asyncio.CancelledError: - self.log.warning("purge-cancelled", subprocess_pid=proc.pid) - try: - proc.terminate() - except ProcessLookupError: - pass - raise + async def run_gc(self) -> None: + self.log.info("gc-started") + await self.source.gc() async def run_callback(self) -> None: if not self.daemon.backup_completed_callback: @@ -484,50 +309,31 @@ async def run_callback(self) -> None: return self.log.info("callback-started") - read, write = os.pipe() - backy_proc = await asyncio.create_subprocess_exec( - BACKY_CMD, - "-b", - str(self.path), - "-l", - str(self.logfile), - "status", - "--yaml", - stdin=subprocess.DEVNULL, - stdout=write, - stderr=subprocess.DEVNULL, - ) - os.close(write) + status = yaml.safe_dump( + [r.to_dict() for r in self.repository.history] + ).encode("utf-8") + callback_proc = await asyncio.create_subprocess_exec( str(self.daemon.backup_completed_callback), self.name, - stdin=read, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - os.close(read) try: - stdout, stderr = await callback_proc.communicate() - return_code1 = await backy_proc.wait() + stdout, stderr = await callback_proc.communicate(status) self.log.info( "callback-finished", - return_code1=return_code1, - return_code2=callback_proc.returncode, - subprocess_pid1=backy_proc.pid, - subprocess_pid2=callback_proc.pid, + return_code=callback_proc.returncode, + subprocess_pid=callback_proc.pid, stdout=stdout.decode() if stdout else None, stderr=stderr.decode() if stderr else None, ) except asyncio.CancelledError: self.log.warning( "callback-cancelled", - subprocess_pid1=backy_proc.pid, - subprocess_pid2=callback_proc.pid, + subprocess_pid=callback_proc.pid, ) - try: - backy_proc.terminate() - except ProcessLookupError: - pass try: callback_proc.terminate() except ProcessLookupError: @@ -549,3 +355,146 @@ def stop(self) -> None: self._task.cancel() self._task = None self.update_status("") + + @locked(target=".backup", mode="exclusive") + async def push_metadata(self) -> int: + grouped = defaultdict(list) + for r in self.repository.clean_history: + if r.pending_changes: + grouped[r.server].append(r) + self.log.info( + "push-start", changes=sum(len(L) for L in grouped.values()) + ) + async with ClientManager( + self.daemon.peers, self.taskid, self.log + ) as apis: + errors = await asyncio.gather( + *[ + self._push_metadata_single(apis[server], grouped[server]) + for server in apis + ] + ) + self.log.info("push-end", errors=sum(errors)) + return sum(errors) + + async def _push_metadata_single( + self, api: Client, revs: List[Revision] + ) -> bool: + purge_required = False + error = False + for r in revs: + log = self.log.bind( + server=r.server, + rev_uuid=r.uuid, + ) + log.debug( + "push-updating-tags", + old_tags=r.orig_tags, + new_tags=r.tags, + ) + try: + await api.put_tags(r, autoremove=True) + if r.tags: + r.orig_tags = r.tags + r.write_info() + else: + r.remove(force=True) + purge_required = True + except ClientResponseError: + log.warning("push-client-error", exc_style="short") + error = True + except ClientConnectionError: + log.warning("push-connection-error", exc_style="short") + error = True + except ClientError: + log.exception("push-error") + error = True + + if purge_required: + log = self.log.bind(server=api.server_name) + log.debug("push-purging-remote") + try: + await api.run_purge(self.name) + except ClientResponseError: + log.warning("push-purge-client-error", exc_style="short") + error = True + except ClientConnectionError: + log.warning("push-purge-connection-error", exc_style="short") + error = True + except ClientError: + log.error("push-purge-error") + error = True + return error + + @locked(target=".backup", mode="exclusive") + async def pull_metadata(self) -> int: + async def remove_dead_peer(): + for r in list(self.repository.history): + if r.server and r.server not in self.daemon.peers: + self.log.info( + "pull-removing-dead-peer", + rev_uuid=r.uuid, + server=r.server, + ) + r.remove(force=True) + return False + + self.log.info("pull-start") + async with ClientManager( + self.daemon.peers, self.taskid, self.log + ) as apis: + errors = await asyncio.gather( + remove_dead_peer(), + *[self._pull_metadata_single(apis[server]) for server in apis], + ) + self.log.info("pull-end", errors=sum(errors)) + return sum(errors) + + async def _pull_metadata_single(self, api: Client) -> bool: + error = False + log = self.log.bind(server=api.server_name) + try: + await api.touch_backup(self.name) + remote_revs = await api.get_revs(self.repository) + log.debug("pull-found-revs", revs=len(remote_revs)) + except ClientResponseError as e: + if e.status in [ + HTTPNotFound.status_code, + HTTPForbidden.status_code, + ]: + log.debug("pull-not-found") + else: + log.warning("pull-client-error", exc_style="short") + error = True + remote_revs = [] + except ClientConnectionError: + log.warning("pull-connection-error", exc_style="short") + return True + except ClientError: + log.exception("pull-error") + error = True + remote_revs = [] + + local_uuids = { + r.uuid + for r in self.repository.history + if r.server == api.server_name + } + remote_uuids = {r.uuid for r in remote_revs} + for uuid in local_uuids - remote_uuids: + log.warning("pull-removing-unknown-rev", rev_uuid=uuid) + self.repository.find_by_uuid(uuid).remove(force=True) + + for r in remote_revs: + if r.uuid in local_uuids: + if ( + r.to_dict() + == self.repository.find_by_uuid(r.uuid).to_dict() + ): + continue + log.debug("pull-updating-rev", rev_uid=r.uuid) + else: + log.debug("pull-new-rev", rev_uid=r.uuid) + r.write_info() + + return error diff --git a/src/backy/daemon/tests/__init__.py b/src/backy/daemon/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/tests/test_api.py b/src/backy/daemon/tests/test_api.py similarity index 96% rename from src/backy/tests/test_api.py rename to src/backy/daemon/tests/test_api.py index 537bfc36..add76c82 100644 --- a/src/backy/tests/test_api.py +++ b/src/backy/daemon/tests/test_api.py @@ -140,7 +140,7 @@ async def test_remove_peer(daemons, log): ds = await daemons(2) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) assert [r.uuid for r in b0.history] == [rev0.uuid] @@ -159,11 +159,11 @@ async def test_remove_remote_backup(daemons, log): ds = await daemons(2) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) j1 = ds[1].jobs["test01"] - b1 = j1.backup + b1 = j1.repository rev1 = create_rev(b1, log) assert [r.uuid for r in b0.history] == [rev0.uuid] @@ -194,11 +194,11 @@ async def test_simple_sync(daemons, log): ds = await daemons(3) j0 = ds[0].jobs["test01"] - b0 = j0.backup + b0 = j0.repository rev0 = create_rev(b0, log) j1 = ds[1].jobs["test01"] - b1 = j1.backup + b1 = j1.repository rev1 = create_rev(b1, log) # ignore offline servers @@ -214,9 +214,8 @@ async def test_simple_sync(daemons, log): assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] new_rev1 = b0.history[1] - assert new_rev1.backup == b0 + assert new_rev1.repository == b0 assert new_rev1.timestamp == rev1.timestamp - assert new_rev1.backend_type == "" assert new_rev1.stats == rev1.stats assert new_rev1.tags == rev1.tags assert new_rev1.orig_tags == new_rev1.tags @@ -227,16 +226,15 @@ async def test_simple_sync(daemons, log): rev1.distrust() rev1.tags = {"manual:new"} rev1.write_info() - rev1.backup.scan() + rev1.repository.scan() await j0.pull_metadata() b0.scan() assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] new_rev1 = b0.history[1] - assert new_rev1.backup == b0 + assert new_rev1.repository == b0 assert new_rev1.timestamp == rev1.timestamp - assert new_rev1.backend_type == "" assert new_rev1.stats == rev1.stats assert new_rev1.tags == rev1.tags assert new_rev1.orig_tags == rev1.tags @@ -245,7 +243,7 @@ async def test_simple_sync(daemons, log): # mark rev for deletion new_rev1.remove() - new_rev1.backup.scan() + new_rev1.repository.scan() assert [r.uuid for r in b0.history] == [rev0.uuid, rev1.uuid] assert new_rev1.tags == set() assert new_rev1.orig_tags == rev1.tags @@ -281,7 +279,7 @@ async def test_split_brain(daemons, log): await modify_authtokens(ds, [0, 1], [2, 3], allow=False, bidirectional=True) js = [d.jobs["test01"] for d in ds] - bs = [j.backup for j in js] + bs = [j.repository for j in js] revs = [create_rev(b, log) for b in bs] for b, r in zip(bs, revs): @@ -289,7 +287,7 @@ async def test_split_brain(daemons, log): for j in js: await j.pull_metadata() - j.backup.scan() + j.repository.scan() del ds[0].config["jobs"]["test01"] ds[0]._apply_config() @@ -389,7 +387,7 @@ async def null_coroutine(*args, delay=0.1, **kw): await asyncio.sleep(delay) async def run_backup(job, tags, delta=datetime.timedelta()): - r = Revision.create(job.backup, tags, log) + r = Revision.create(job.repository, tags, log) r.timestamp = backy.utils.now() + delta r.stats["duration"] = 1 r.write_info() @@ -414,7 +412,7 @@ async def delay_or_event(delay, event): for job, start_delay in zip(jobs, start_delays): monkeypatch.setattr(job, "run_expiry", null_coroutine) - monkeypatch.setattr(job, "run_purge", null_coroutine) + monkeypatch.setattr(job, "run_gc", null_coroutine) monkeypatch.setattr(job, "run_callback", null_coroutine) monkeypatch.setattr(job, "run_backup", partial(run_backup, job)) monkeypatch.setattr(job, "pull_metadata", null_coroutine) @@ -484,7 +482,7 @@ async def test_wait_for_leader_parallel(jobs_dry_run): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=1 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (fast)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (fast)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... ... AAAA I test01[N6PW] job/leader-finished [server-1] leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' @@ -535,7 +533,7 @@ async def test_wait_for_leader_delayed(jobs_dry_run): ... AAAA I test01[N6PW] job/leader-not-scheduled [server-1] leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[N6PW] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' ... """ @@ -599,12 +597,12 @@ async def crash(*args, **kw): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=1 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (fast)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (fast)' -... AAAA I daemon/api-reconfigure [server-0] \n\ +... AAAA I - daemon/api-reconfigure [server-0] \n\ ... ... AAAA W test01[N6PW] job/leader-failed [server-1] exception_class='aiohttp.client_exceptions.ClientResponseError' exception_msg="401, message='Unauthorized', url=URL('...')" leader='server-0' ... AAAA D test01[N6PW] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[N6PW] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-0] status='finished' ... ... AAAA D test01[N6PW] job/updating-status [server-1] status='finished' @@ -647,7 +645,7 @@ async def test_wait_for_leader_stopped(jobs_dry_run): ... AAAA I test01[A4WN] job/leader-stopped [server-1] leader='server-0' ... AAAA D test01[A4WN] job/updating-status [server-1] status='waiting for worker slot (slow)' ... AAAA D test01[A4WN] job/updating-status [server-1] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-1] status='finished' ... """ @@ -704,7 +702,7 @@ async def noop(*args, **kw): ... AAAA I test01[A4WN] job/leader-found [server-0] leader=None leader_revs=0 ... AAAA D test01[A4WN] job/updating-status [server-0] status='waiting for worker slot (slow)' ... AAAA D test01[A4WN] job/updating-status [server-0] status='running (slow)' -... AAAA D revision/writing-info revision_uuid='...' tags='daily' +... AAAA D - revision/writing-info revision_uuid='...' tags='daily' ... AAAA D test01[A4WN] job/updating-status [server-0] status='finished' ... """ diff --git a/src/backy/tests/test_callback.sh b/src/backy/daemon/tests/test_callback.sh similarity index 100% rename from src/backy/tests/test_callback.sh rename to src/backy/daemon/tests/test_callback.sh diff --git a/src/backy/tests/test_daemon.py b/src/backy/daemon/tests/test_daemon.py similarity index 76% rename from src/backy/tests/test_daemon.py rename to src/backy/daemon/tests/test_daemon.py index d8829ff9..8f39e210 100644 --- a/src/backy/tests/test_daemon.py +++ b/src/backy/daemon/tests/test_daemon.py @@ -6,18 +6,63 @@ import signal from pathlib import Path from unittest import mock +from unittest.mock import Mock import pytest import yaml +import backy.daemon from backy import utils -from backy.backends.chunked import ChunkedFileBackend from backy.daemon import BackyDaemon +from backy.daemon.scheduler import Job +from backy.file import FileSource from backy.revision import Revision -from backy.scheduler import Job from backy.tests import Ellipsis +def test_display_help(capsys, argv): + argv.append("--help") + with pytest.raises(SystemExit) as exit: + backy.daemon.main() + assert exit.value.code == 0 + out, err = capsys.readouterr() + assert ( + Ellipsis( + """\ +usage: pytest [-h] [-v] [-l LOGFILE] [-c CONFIG] + +Backy daemon - runs the scheduler and API. + +options: +... +""" + ) + == out + ) + assert err == "" + + +async def test_main(tmp_path, argv, monkeypatch): + mock = Mock() + monkeypatch.setattr(backy.daemon.BackyDaemon, "start", mock) + monkeypatch.setattr(backy.daemon.BackyDaemon, "api_server", mock) + monkeypatch.setattr(backy.daemon.BackyDaemon, "run_forever", mock) + argv.extend( + ["-v", "-l", str(tmp_path / "log"), "-c", str(tmp_path / "conf")] + ) + utils.log_data = "" + + backy.daemon.main() + + assert mock.call_count == 3 + assert ( + Ellipsis( + "... D - command/invoked args='... -v -l ... -c ...\n" + ) + == utils.log_data + ) + + @pytest.fixture async def daemon(tmp_path, monkeypatch, log): daemon = BackyDaemon(tmp_path / "config", log) @@ -150,23 +195,22 @@ async def test_run_backup(daemon, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) - job.backup.scan() - assert len(job.backup.history) == 1 - revision = job.backup.history[0] + job.repository.scan() + assert len(job.repository.history) == 1 + revision = job.repository.history[0] assert revision.tags == {"manual:asdf"} - backend = ChunkedFileBackend(revision, log) - with backend.open("r") as f: + source = job.source.create_source(FileSource) + with source._path_for_revision(revision).open("rb") as f: assert f.read() == b"I am your father, Luke!" # Run again. This also covers the code path that works if # the target backup directory exists already. await job.run_backup({"manual:asdf"}) - job.backup.scan() - assert len(job.backup.history) == 2 - revision = job.backup.history[1] + job.repository.scan() + assert len(job.repository.history) == 2 + revision = job.repository.history[1] assert revision.tags == {"manual:asdf"} - backend = ChunkedFileBackend(revision, log) - with backend.open("r") as f: + with source._path_for_revision(revision).open("rb") as f: assert f.read() == b"I am your father, Luke!" @@ -174,6 +218,7 @@ async def test_run_callback(daemon, log): job = daemon.jobs["test01"] await job.run_backup({"manual:asdf"}) + job.repository.scan() await job.run_callback() with open("test01.callback_stdin", "r") as f: @@ -203,8 +248,8 @@ def test_sla_before_first_backup(daemon): # I agree that this gives us a blind spot in the beginning. I'll # think of something when this happens. Maybe keeping a log of errors # or so to notice that we tried previously. - assert len(job.backup.history) == 0 - assert job.sla is True + assert len(job.repository.history) == 0 + assert job.repository.sla is True def test_sla_over_time(daemon, clock, tmp_path, log): @@ -213,32 +258,32 @@ def test_sla_over_time(daemon, clock, tmp_path, log): # I agree that this gives us a blind spot in the beginning. I'll # think of something when this happens. Maybe keeping a log of errors # or so to notice that we tried previously. - revision = Revision.create(job.backup, set(), log) + revision = Revision.create(job.repository, set(), log) # We're on a 24h cycle. 6 hours old backup is fine. revision.timestamp = utils.now() - datetime.timedelta(hours=6) revision.stats["duration"] = 60.0 revision.materialize() - job.backup.scan() - assert len(job.backup.history) == 1 - assert job.sla is True + job.repository.scan() + assert len(job.repository.history) == 1 + assert job.repository.sla is True # 24 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=24) revision.write_info() - job.backup.scan() - assert job.sla is True + job.repository.scan() + assert job.repository.sla is True # 32 hours is also fine. revision.timestamp = utils.now() - datetime.timedelta(hours=32) revision.write_info() - job.backup.scan() - assert job.sla is True + job.repository.scan() + assert job.repository.sla is True # 24*1.5 hours is the last time that is OK. revision.timestamp = utils.now() - datetime.timedelta(hours=24 * 1.5) revision.write_info() - job.backup.scan() - assert job.sla is True + job.repository.scan() + assert job.repository.sla is True # 1 second later we consider this not to be good any longer. revision.timestamp = ( @@ -247,27 +292,27 @@ def test_sla_over_time(daemon, clock, tmp_path, log): - datetime.timedelta(seconds=1) ) revision.write_info() - job.backup.scan() - assert job.sla is False + job.repository.scan() + assert job.repository.sla is False # a running backup does not influence this. job.update_status("running (slow)") - r = Revision.create(job.backup, {"daily"}, log) + r = Revision.create(job.repository, {"daily"}, log) r.write_info() - assert job.sla is False + assert job.repository.sla is False def test_incomplete_revs_dont_count_for_sla(daemon, clock, tmp_path, log): job = daemon.jobs["test01"] - r1 = Revision.create(job.backup, set(), log) + r1 = Revision.create(job.repository, set(), log) r1.timestamp = utils.now() - datetime.timedelta(hours=48) r1.stats["duration"] = 60.0 r1.materialize() - r2 = Revision.create(job.backup, set(), log) + r2 = Revision.create(job.repository, set(), log) r2.timestamp = utils.now() - datetime.timedelta(hours=1) r2.materialize() - job.backup.scan() - assert False is job.sla + job.repository.scan() + assert False is job.repository.sla def test_update_status(daemon, log): @@ -291,7 +336,7 @@ async def test_task_generator(daemon, clock, tmp_path, monkeypatch, tz_berlin): await cancel_and_wait(j) job = daemon.jobs["test01"] - async def null_coroutine(): + async def null_coroutine(*args, **kw): return monkeypatch.setattr(job, "_wait_for_deadline", null_coroutine) @@ -319,7 +364,7 @@ async def test_task_generator_backoff( await cancel_and_wait(j) job = daemon.jobs["test01"] - async def null_coroutine(): + async def null_coroutine(*args, **kw): await asyncio.sleep(0.1) async def false_coroutine(*args, **kw): @@ -339,7 +384,7 @@ async def failing_coroutine(*args, **kw): monkeypatch.setattr(job, "_wait_for_deadline", null_coroutine) monkeypatch.setattr(job, "run_expiry", null_coroutine) - monkeypatch.setattr(job, "run_purge", null_coroutine) + monkeypatch.setattr(job, "run_gc", null_coroutine) monkeypatch.setattr(job, "run_callback", null_coroutine) monkeypatch.setattr(job, "run_backup", failing_coroutine) monkeypatch.setattr(job, "pull_metadata", null_coroutine) @@ -367,40 +412,40 @@ async def wait_for_job_finished(): Ellipsis( """\ ... D test01[...] job/loop-started \n\ -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-02 07:32:51' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=120 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:08:47' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=240 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:10:47' ... E test01[...] job/exception exception_class='builtins.Exception' exception_msg='' exception>\tTraceback (most recent call last): -exception>\t File "/.../src/backy/scheduler.py", line ..., in run_forever +exception>\t File "/.../src/backy/daemon/scheduler.py", line ..., in run_forever exception>\t await self.run_backup(next_tags) -exception>\t File "/.../src/backy/tests/test_daemon.py", line ..., in failing_coroutine +exception>\t File "/.../src/backy/daemon/tests/test_daemon.py", line ..., in failing_coroutine exception>\t raise Exception() exception>\tException ... W test01[...] job/backoff backoff=480 -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-01 09:14:47' ... I test01[...] job/stop \n\ -... D test01[...] quarantine/scan entries=0 +... D test01[...] repo/scan-reports entries=0 ... I test01[...] job/waiting next_tags='daily' next_time='2015-09-02 07:32:51' """ ) @@ -421,16 +466,16 @@ def test_daemon_status_filter_re(daemon): async def test_purge_pending(daemon, monkeypatch): - run_purge = mock.Mock() - monkeypatch.setattr("backy.scheduler.Job.run_purge", run_purge) + run_gc = mock.Mock() + monkeypatch.setattr("backy.daemon.scheduler.Job.run_gc", run_gc) monkeypatch.setattr( "asyncio.sleep", mock.Mock(side_effect=asyncio.CancelledError()) ) - daemon.jobs["test01"].backup.set_purge_pending() + daemon.jobs["test01"].repository.set_purge_pending() del daemon.jobs["test01"] with pytest.raises(asyncio.CancelledError): await daemon.purge_pending_backups() - run_purge.assert_called_once() + run_gc.assert_called_once() diff --git a/src/backy/tests/test_scheduler.py b/src/backy/daemon/tests/test_scheduler.py similarity index 97% rename from src/backy/tests/test_scheduler.py rename to src/backy/daemon/tests/test_scheduler.py index 987ddef3..551a5221 100644 --- a/src/backy/tests/test_scheduler.py +++ b/src/backy/daemon/tests/test_scheduler.py @@ -4,7 +4,7 @@ import pytest import backy.utils -from backy.scheduler import Job +from backy.daemon.scheduler import Job @pytest.fixture diff --git a/src/backy/ext_deps.py b/src/backy/ext_deps.py index cc2ce7d9..51212d45 100644 --- a/src/backy/ext_deps.py +++ b/src/backy/ext_deps.py @@ -6,10 +6,18 @@ import os import sys -BACKY_CMD = os.environ.get( - "BACKY_CMD", +BACKY_CLI_CMD = os.environ.get( + "BACKY_CLI_CMD", os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy"), ) +BACKY_RBD_CMD = os.environ.get( + "BACKY_RBD_CMD", + os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy-rbd"), +) +BACKY_S3_CMD = os.environ.get( + "BACKY_S3_CMD", + os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), "backy-s3"), +) CP = os.environ.get("BACKY_CP", "cp") RBD = os.environ.get("BACKY_RBD", "rbd") BACKY_EXTRACT = os.environ.get("BACKY_EXTRACT", "backy-extract") diff --git a/src/backy/fallocate.py b/src/backy/fallocate.py deleted file mode 100644 index 691c1ab5..00000000 --- a/src/backy/fallocate.py +++ /dev/null @@ -1,66 +0,0 @@ -# Adapted from -# https://github.com/trbs/fallocate/issues/4 - -import ctypes -import ctypes.util -import os - -import structlog - -log = structlog.stdlib.get_logger() - -FALLOC_FL_KEEP_SIZE = 0x01 -FALLOC_FL_PUNCH_HOLE = 0x02 - - -def _fake_fallocate(fd, mode, offset, len_): - log.debug("fallocate-non-hole-punching") - if len_ <= 0: - raise IOError("fallocate: length must be positive") - if mode & FALLOC_FL_PUNCH_HOLE: - old = fd.tell() - fd.seek(offset) - fd.write(b"\x00" * len_) - fd.seek(old) - else: - raise NotImplementedError( - "fake fallocate() supports only hole punching" - ) - - -def _make_fallocate(): - libc_name = ctypes.util.find_library("c") - libc = ctypes.CDLL(libc_name, use_errno=True) - _fallocate = libc.fallocate - c_off_t = ctypes.c_size_t - _fallocate.restype = ctypes.c_int - _fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t] - - def fallocate(fd, mode, offset, len_): - if len_ <= 0: - raise IOError("fallocate: length must be positive") - res = _fallocate(fd.fileno(), mode, offset, len_) - if res != 0: - errno = ctypes.get_errno() - raise OSError(errno, "fallocate: " + os.strerror(errno)) - - return fallocate - - -try: - fallocate = _make_fallocate() -except AttributeError: # pragma: no cover - fallocate = _fake_fallocate - - -def punch_hole(f, offset, len_): - """Ensure that the specified byte range is zeroed. - - Depending on the availability of fallocate(), this is either - delegated to the kernel or done manualy. - """ - params = (f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, offset, len_) - try: - fallocate(*params) - except OSError: - _fake_fallocate(*params) diff --git a/src/backy/file/__init__.py b/src/backy/file/__init__.py new file mode 100644 index 00000000..768c0cb8 --- /dev/null +++ b/src/backy/file/__init__.py @@ -0,0 +1,88 @@ +import shutil +import sys +import time +from argparse import _ActionsContainer +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable + +from structlog.stdlib import BoundLogger + +from backy.revision import Revision +from backy.source import RestoreArgs, Source + +from ..repository import Repository + + +@dataclass(frozen=True) +class FileRestoreArgs(RestoreArgs): + target: Path + + def to_cmdargs(self) -> Iterable[str]: + return [str(self.target)] + + @classmethod + def from_args(cls, **kw: Any) -> "FileRestoreArgs": + return cls(kw["target"]) + + @classmethod + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: + restore_parser.add_argument( + "target", + type=Path, + metavar="TARGET", + help="Copy backed up revision to TARGET", + ) + + +class FileSource(Source[FileRestoreArgs]): + type_ = "file" + restore_type = FileRestoreArgs + + filename: Path # the source we are backing up + + def __init__(self, repository: Repository, filename: Path): + super().__init__(repository) + self.filename = filename + + @classmethod + def from_config( + cls, repository: Repository, config: dict[str, Any], log: BoundLogger + ) -> "FileSource": + assert config["type"] == "file" + return cls(repository, Path(config["filename"])) + + # def to_config(self) -> dict[str, Any]: + # return {"type": self.type_, "path": str(self.path)} + + def _path_for_revision(self, revision: Revision) -> Path: + return self.repository.path / revision.uuid + + def backup(self, revision: Revision): + backup = self._path_for_revision(revision) + assert not backup.exists() + start = time.time() + shutil.copy(self.filename, backup) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() + return True + + def restore(self, revision: Revision, args: FileRestoreArgs): + shutil.copy(self._path_for_revision(revision), args.target) + + def gc(self): + files = set(self.repository.path.glob("*.rev")) + expected_files = set( + (self.repository.path / r.uuid) + for r in self.repository.get_history() + ) + for file in files - expected_files: + file.unlink() + + def verify(self, revision: Revision): + assert self._path_for_revision(revision).exists() + + +def main(): + sys.exit(FileSource.main(*sys.argv)) diff --git a/src/backy/file/tests/test_file.py b/src/backy/file/tests/test_file.py new file mode 100644 index 00000000..da0f0927 --- /dev/null +++ b/src/backy/file/tests/test_file.py @@ -0,0 +1,53 @@ +from backy.file import FileRestoreArgs, FileSource +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule +from backy.source import CmdLineSource + + +def test_bootstrap_from_api(tmp_path, log): + original = tmp_path / "original.txt" + + schedule = Schedule() + repository = Repository(tmp_path / "repository", schedule, log) + repository.connect() + source = FileSource(repository, original) + + exercise_fresh_repo(source) + + +def test_bootstrap_from_config(tmp_path, log): + original = tmp_path / "original.txt" + + repo_path = tmp_path / "repository" + + conf = { + "path": repo_path, + "schedule": {}, + "source": {"type": "file", "filename": str(original)}, + } + + source = CmdLineSource.from_config(conf, log).create_source(FileSource) + + exercise_fresh_repo(source) + + +def exercise_fresh_repo(source: FileSource): + original = source.filename + + with open(original, "w") as f: + f.write("This is the original file.") + + revision = Revision.create( + source.repository, {"test"}, source.repository.log + ) + source.backup(revision) + + with open(original, "w") as f: + f.write("This is the wrong file.") + + assert original.read_text() == "This is the wrong file." + + source.restore(revision, FileRestoreArgs(original)) + + assert original.read_text() == "This is the original file." diff --git a/src/backy/logging.py b/src/backy/logging.py index 91bc7e7d..ccf30de6 100644 --- a/src/backy/logging.py +++ b/src/backy/logging.py @@ -3,7 +3,6 @@ # repository for complete details. import io -import os import string import sys from pathlib import Path @@ -215,7 +214,7 @@ def write(line): self._level_to_color[level] + level[0].upper() + RESET_ALL + " " ) - job_name = event_dict.pop("job_name", "") + job_name = event_dict.pop("job_name", "-") sub_taskid = event_dict.pop("sub_taskid", None) if sub_taskid: job_name += f"[{sub_taskid}]" @@ -232,10 +231,14 @@ def write(line): + RESET_ALL + " " ) - if len(subsystem + event) > self._pad_event and hasattr( - utils, "log_data" - ): - raise RuntimeWarning("logline to long: " + subsystem + event) + + test_mode = hasattr(utils, "log_data") + if test_mode and len(subsystem + event) > self._pad_event: + raise RuntimeWarning( + "subsystem and/or event names are too long: " + + subsystem + + event + ) logger_name = event_dict.pop("logger", None) if logger_name is not None: @@ -358,7 +361,6 @@ def init_logging( logfile: Optional[Path] = None, defaults: Optional[dict] = None, ): - console_file_renderer = ConsoleFileRenderer( min_level="trace" if verbose else "info", ) diff --git a/src/backy/main.py b/src/backy/main.py deleted file mode 100644 index ffb16ab2..00000000 --- a/src/backy/main.py +++ /dev/null @@ -1,630 +0,0 @@ -# -*- encoding: utf-8 -*- - -import argparse -import asyncio -import errno -import sys -from pathlib import Path -from typing import Literal, Optional - -import humanize -import structlog -import tzlocal -import yaml -from aiohttp import ClientConnectionError -from rich import print as rprint -from rich.table import Column, Table -from structlog.stdlib import BoundLogger - -import backy.daemon -from backy.utils import format_datetime_local, generate_taskid - -from . import logging -from .backup import Backup, RestoreBackend -from .client import APIClient, CLIClient - - -class Command(object): - """Proxy between CLI calls and actual backup code.""" - - path: Path - taskid: str - log: BoundLogger - - def __init__(self, path: Path, taskid, log: BoundLogger): - self.path = path - self.taskid = taskid - self.log = log - - def status(self, yaml_: bool, revision: str) -> None: - revs = Backup(self.path, self.log).find_revisions(revision) - if yaml_: - print(yaml.safe_dump([r.to_dict() for r in revs])) - return - total_bytes = 0 - - tz = tzlocal.get_localzone() - t = Table( - f"Date ({tz})", - "ID", - Column("Size", justify="right"), - Column("Duration", justify="right"), - "Tags", - "Trust", - "Server", - ) - - for r in revs: - total_bytes += r.stats.get("bytes_written", 0) - duration = r.stats.get("duration") - if duration: - duration = humanize.naturaldelta(duration) - else: - duration = "-" - - if r.pending_changes: - added = [f"+[on green]{t}[/]" for t in r.tags - r.orig_tags] - removed = [f"-[on red]{t}[/]" for t in r.orig_tags - r.tags] - same = list(r.orig_tags & r.tags) - tags = ",".join(added + removed + same) - else: - tags = ",".join(r.tags) - - t.add_row( - format_datetime_local(r.timestamp)[0], - r.uuid, - humanize.naturalsize( - r.stats.get("bytes_written", 0), binary=True - ), - duration, - tags, - r.trust.value, - f"[underline italic]{r.server}[/]" - if r.pending_changes - else r.server, - ) - - rprint(t) - - print( - "{} revisions containing {} data (estimated)".format( - len(revs), humanize.naturalsize(total_bytes, binary=True) - ) - ) - pending_changes = sum(1 for r in revs if r.pending_changes) - if pending_changes: - rprint( - f"[yellow]{pending_changes} pending change(s)[/] (Push changes with `backy push`)" - ) - - def backup(self, tags: str, force: bool) -> int: - b = Backup(self.path, self.log) - b._clean() - try: - tags_ = set(t.strip() for t in tags.split(",")) - success = b.backup(tags_, force) - return int(not success) - except IOError as e: - if e.errno not in [errno.EDEADLK, errno.EAGAIN]: - raise - self.log.warning("backup-already-running") - return 1 - finally: - b._clean() - - def restore( - self, revision: str, target: str, restore_backend: RestoreBackend - ) -> None: - b = Backup(self.path, self.log) - b.restore(revision, target, restore_backend) - - def find(self, revision: str, uuid: bool) -> None: - b = Backup(self.path, self.log) - for r in b.find_revisions(revision): - if uuid: - print(r.uuid) - else: - print(r.filename) - - def forget(self, revision: str) -> None: - b = Backup(self.path, self.log) - b.forget(revision) - b.warn_pending_changes() - - def scheduler(self, config: Path) -> None: - backy.daemon.main(config, self.log) - - def purge(self) -> None: - b = Backup(self.path, self.log) - b.purge() - - def upgrade(self) -> None: - b = Backup(self.path, self.log) - b.upgrade() - - def distrust(self, revision: str) -> None: - b = Backup(self.path, self.log) - b.distrust(revision) - - def verify(self, revision: str) -> None: - b = Backup(self.path, self.log) - b.verify(revision) - - def client( - self, - config: Path, - peer: str, - url: str, - token: str, - apifunc: str, - **kwargs, - ) -> int: - async def run() -> int: - if peer and (url or token): - self.log.error( - "client-argparse-error", - _fmt_msg="--peer conflicts with --url and --token", - ) - return 1 - if bool(url) ^ bool(token): - self.log.error( - "client-argparse-error", - _fmt_msg="--url and --token require each other", - ) - return 1 - if url and token: - api = APIClient("", url, token, self.taskid, self.log) - else: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - if peer: - if peer not in d.peers: - self.log.error( - "client-peer-unknown", - _fmt_msg="The peer {peer} is not known. Select a known peer or specify --url and --token.\n" - "The following peers are known: {known}", - peer=peer, - known=", ".join(d.peers.keys()), - ) - return 1 - api = APIClient.from_conf( - peer, d.peers[peer], self.taskid, self.log - ) - else: - if "token" not in d.api_cli_default: - self.log.error( - "client-missing-defaults", - _fmt_msg="The config file is missing default parameters. Please specify --url and --token", - ) - return 1 - api = APIClient.from_conf( - "", d.api_cli_default, self.taskid, self.log - ) - async with CLIClient(api, self.log) as c: - try: - await getattr(c, apifunc)(**kwargs) - except ClientConnectionError: - c.log.error("connection-error", exc_style="banner") - c.log.debug("connection-error", exc_info=True) - return 1 - return 0 - - return asyncio.run(run()) - - def tags( - self, - action: Literal["set", "add", "remove"], - autoremove: bool, - expect: Optional[str], - revision: str, - tags: str, - force: bool, - ) -> int: - tags_ = set(t.strip() for t in tags.split(",")) - if expect is None: - expect_ = None - else: - expect_ = set(t.strip() for t in expect.split(",")) - b = backy.backup.Backup(self.path, self.log) - success = b.tags( - action, - revision, - tags_, - expect=expect_, - autoremove=autoremove, - force=force, - ) - b.warn_pending_changes() - return int(not success) - - def expire(self) -> None: - b = backy.backup.Backup(self.path, self.log) - b.expire() - b.warn_pending_changes() - - def push(self, config: Path) -> int: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - b = backy.backup.Backup(self.path, self.log) - errors = asyncio.run(b.push_metadata(d.peers, self.taskid)) - return int(bool(errors)) - - def pull(self, config: Path) -> int: - d = backy.daemon.BackyDaemon(config, self.log) - d._read_config() - b = backy.backup.Backup(self.path, self.log) - errors = asyncio.run(b.pull_metadata(d.peers, self.taskid)) - return int(bool(errors)) - - -def setup_argparser(): - parser = argparse.ArgumentParser( - description="Backup and restore for block devices.", - ) - - parser.add_argument( - "-v", "--verbose", action="store_true", help="verbose output" - ) - parser.add_argument( - "-l", - "--logfile", - type=Path, - help=( - "file name to write log output in. " - "(default: /var/log/backy.log for `scheduler`, ignored for `client`, " - "$backupdir/backy.log otherwise)" - ), - ) - parser.add_argument( - "-b", - "--backupdir", - default=".", - type=Path, - help=( - "directory where backups and logs are written to " - "(default: %(default)s)" - ), - ) - parser.add_argument( - "-t", - "--taskid", - default=generate_taskid(), - help="id to include in log messages (default: 4 random base32 chars)", - ) - - subparsers = parser.add_subparsers() - - # CLIENT - client = subparsers.add_parser( - "client", - help="Query the api", - ) - g = client.add_argument_group() - g.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - g.add_argument("-p", "--peer", help="(default: read from config file)") - g = client.add_argument_group() - g.add_argument("--url") - g.add_argument("--token") - client.set_defaults(func="client") - client_parser = client.add_subparsers() - - # CLIENT jobs - p = client_parser.add_parser("jobs", help="List status of all known jobs") - p.add_argument( - "filter_re", - default="", - metavar="[filter]", - nargs="?", - help="Optional job filter regex", - ) - p.set_defaults(apifunc="jobs") - - # CLIENT status - p = client_parser.add_parser("status", help="Show job status overview") - p.set_defaults(apifunc="status") - - # CLIENT run - p = client_parser.add_parser( - "run", help="Trigger immediate run for one job" - ) - p.add_argument("job", metavar="", help="Name of the job to run") - p.set_defaults(apifunc="run") - - # CLIENT runall - p = client_parser.add_parser( - "runall", help="Trigger immediate run for all jobs" - ) - p.set_defaults(apifunc="runall") - - # CLIENT reload - p = client_parser.add_parser("reload", help="Reload the configuration") - p.set_defaults(apifunc="reload") - - # CLIENT check - p = client_parser.add_parser( - "check", - help="Check whether all jobs adhere to their schedules' SLA", - ) - p.set_defaults(apifunc="check") - - # BACKUP - p = subparsers.add_parser( - "backup", - help="Perform a backup", - ) - p.add_argument( - "-f", "--force", action="store_true", help="Do not validate tags" - ) - p.add_argument("tags", help="Tags to apply to the backup") - p.set_defaults(func="backup") - - # RESTORE - p = subparsers.add_parser( - "restore", - help="Restore (a given revision) to a given target", - ) - p.add_argument( - "--backend", - type=RestoreBackend, - choices=list(RestoreBackend), - default=RestoreBackend.AUTO, - dest="restore_backend", - help="(default: %(default)s)", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="latest", - help="use revision SPEC as restore source (default: %(default)s)", - ) - p.add_argument( - "target", - metavar="TARGET", - help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', - ) - p.set_defaults(func="restore") - - # BACKUP - p = subparsers.add_parser( - "purge", - help="Purge the backup store (i.e. chunked) from unused data", - ) - p.set_defaults(func="purge") - - # FIND - p = subparsers.add_parser( - "find", - help="Print full path or uuid of specified revisions", - ) - p.add_argument( - "--uuid", - action="store_true", - help="Print uuid instead of full path", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="latest", - help="use revision SPEC to find (default: %(default)s)", - ) - p.set_defaults(func="find") - - # STATUS - p = subparsers.add_parser( - "status", - help="Show backup status. Show inventory and summary information", - ) - p.add_argument("--yaml", dest="yaml_", action="store_true") - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="all", - help="use revision SPEC as filter (default: %(default)s)", - ) - p.set_defaults(func="status") - - # upgrade - p = subparsers.add_parser( - "upgrade", - help="Upgrade this backup (incl. its data) to the newest supported version", - ) - p.set_defaults(func="upgrade") - - # SCHEDULER DAEMON - p = subparsers.add_parser( - "scheduler", - help="Run the scheduler", - ) - p.set_defaults(func="scheduler") - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - - # DISTRUST - p = subparsers.add_parser( - "distrust", - help="Distrust specified revisions", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="local", - help="use revision SPEC to distrust (default: %(default)s)", - ) - p.set_defaults(func="distrust") - - # VERIFY - p = subparsers.add_parser( - "verify", - help="Verify specified revisions", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="trust:distrusted&local", - help="use revision SPEC to verify (default: %(default)s)", - ) - p.set_defaults(func="verify") - - # FORGET - p = subparsers.add_parser( - "forget", - help="Forget specified revision", - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - required=True, - help="use revision SPEC to forget", - ) - p.set_defaults(func="forget") - - # TAGS - p = subparsers.add_parser( - "tags", - help="Modify tags on revision", - ) - p.add_argument( - "--autoremove", - action="store_true", - help="Remove revision if no tags remain", - ) - p.add_argument( - "-f", "--force", action="store_true", help="Do not validate tags" - ) - p.add_argument( - "--expect", - metavar="", - help="Do nothing if tags differ from the expected tags", - ) - p.add_argument( - "action", - choices=["set", "add", "remove"], - ) - p.add_argument( - "-r", - "--revision", - metavar="SPEC", - default="all", - help="modify tags for revision SPEC, modifies all if not given (default: %(default)s)", - ) - p.add_argument( - "tags", - metavar="", - help="comma separated list of tags", - ) - p.set_defaults(func="tags") - - # EXPIRE - p = subparsers.add_parser( - "expire", - help="Expire tags according to schedule", - ) - p.set_defaults(func="expire") - - # PUSH - p = subparsers.add_parser( - "push", - help="Push pending changes to remote servers", - ) - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - p.set_defaults(func="push") - - # PULL - p = subparsers.add_parser( - "pull", - help="Push pending changes to remote servers", - ) - p.add_argument( - "-c", - "--config", - type=Path, - default="/etc/backy.conf", - help="(default: %(default)s)", - ) - p.set_defaults(func="pull") - - return parser, client - - -def main(): - parser, client_parser = setup_argparser() - args = parser.parse_args() - - if not hasattr(args, "func"): - parser.print_usage() - sys.exit(0) - if args.func == "client" and not hasattr(args, "apifunc"): - client_parser.print_usage() - sys.exit(0) - - default_logfile: Optional[Path] - match args.func: - case "scheduler": - default_logfile = Path("/var/log/backy.log") - case "client": - default_logfile = None - case _: - default_logfile = args.backupdir / "backy.log" - - match (args.func, vars(args).get("apifunc")): - case ("scheduler", _): - default_job_name = "-" - case ("client", "check"): - default_job_name = "-" - case _: - default_job_name = "" - - # Logging - logging.init_logging( - args.verbose, - args.logfile or default_logfile, - defaults={"job_name": default_job_name, "taskid": args.taskid}, - ) - log = structlog.stdlib.get_logger(subsystem="command") - log.debug("invoked", args=" ".join(sys.argv)) - - command = Command(args.backupdir, args.taskid, log) - func = getattr(command, args.func) - - # Pass over to function - func_args = dict(args._get_kwargs()) - del func_args["func"] - del func_args["verbose"] - del func_args["backupdir"] - del func_args["logfile"] - del func_args["taskid"] - - try: - log.debug("parsed", func=args.func, func_args=func_args) - ret = func(**func_args) - if isinstance(ret, int): - log.debug("return-code", code=ret) - sys.exit(ret) - log.debug("successful") - sys.exit(0) - except Exception: - log.exception("failed") - sys.exit(1) diff --git a/src/backy/quarantine.py b/src/backy/quarantine.py deleted file mode 100644 index 9f22c5a4..00000000 --- a/src/backy/quarantine.py +++ /dev/null @@ -1,106 +0,0 @@ -import datetime -import hashlib -import traceback -from pathlib import Path -from typing import List - -import shortuuid -import yaml -from structlog.stdlib import BoundLogger -from yaml import SafeDumper - -import backy -from backy.utils import SafeFile - - -class QuarantineReport: - uuid: str - source_chunk: bytes - source_hash: str - target_chunk: bytes - target_hash: str - offset: int - timestamp: datetime.datetime - traceback: str - - def __init__( - self, source_chunk: bytes, target_chunk: bytes, offset: int - ) -> None: - self.uuid = shortuuid.uuid() - self.source_chunk = source_chunk - self.source_hash = hashlib.md5(source_chunk).hexdigest() - self.target_chunk = target_chunk - self.target_hash = hashlib.md5(target_chunk).hexdigest() - self.offset = offset - self.timestamp = backy.utils.now() - self.traceback = "".join(traceback.format_stack()).strip() - - def to_dict(self) -> dict: - return { - "uuid": self.uuid, - "source_hash": self.source_hash, - "target_hash": self.target_hash, - "offset": self.offset, - "timestamp": self.timestamp, - "traceback": self.traceback, - } - - -class QuarantineStore: - path: Path - chunks_path: Path - report_ids: List[str] - log: BoundLogger - - def __init__(self, backup_path: Path, log: BoundLogger) -> None: - self.path = backup_path / "quarantine" - self.path.mkdir(exist_ok=True) - self.chunks_path = self.path / "chunks" - self.chunks_path.mkdir(exist_ok=True) - self.log = log.bind(subsystem="quarantine") - self.scan() - - def add_report(self, report: QuarantineReport) -> None: - self.log.info("add-report", uuid=report.uuid) - self._store_chunk(report.source_chunk, report.source_hash) - self._store_chunk(report.target_chunk, report.target_hash) - self._store_report(report) - - self.report_ids.append(report.uuid) - - def _store_report(self, report: QuarantineReport) -> None: - self.log.debug("store-report", uuid=report.uuid) - path = self.path / f"{report.uuid}.report" - if path.exists(): - self.log.debug("store-report-exists", uuid=report.uuid) - return - - class CustomDumper(SafeDumper): - pass - - def representer(dumper, data): - return dumper.represent_scalar( - "tag:yaml.org,2002:str", - data, - style="|" if len(data) > 100 else None, - ) - - yaml.add_representer(str, representer, Dumper=CustomDumper) - - with SafeFile(path, encoding="utf-8") as f: - f.open_new("wb") - yaml.dump(report.to_dict(), f, sort_keys=False, Dumper=CustomDumper) - - def _store_chunk(self, chunk: bytes, hash: str) -> None: - self.log.debug("store-chunk", hash=hash) - path = self.chunks_path / hash - if path.exists(): - self.log.debug("store-chunk-exists", hash=hash) - return - with SafeFile(path) as f: - f.open_new("wb") - f.write(chunk) - - def scan(self) -> None: - self.report_ids = [g.name for g in self.path.glob("*.report")] - self.log.debug("scan", entries=len(self.report_ids)) diff --git a/src/backy/rbd/__init__.py b/src/backy/rbd/__init__.py new file mode 100644 index 00000000..86d6a7ba --- /dev/null +++ b/src/backy/rbd/__init__.py @@ -0,0 +1,583 @@ +import json +import os +import subprocess +import sys +import time +import uuid +from argparse import _ActionsContainer +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import IO, Any, Callable, Iterable, Literal, Optional, Set, cast + +import consulate +from structlog.stdlib import BoundLogger + +import backy +import backy.utils +from backy.ext_deps import BACKY_EXTRACT +from backy.report import ChunkMismatchReport +from backy.repository import Repository +from backy.revision import Revision, Trust +from backy.source import RestoreArgs, Source +from backy.utils import ( + CHUNK_SIZE, + END, + TimeOut, + TimeOutError, + copy, + posix_fadvise, + report_status, +) + +from .chunked import BackendException, Chunk, File, Hash, Store +from .rbd import RBDClient + + +def locked(target: str, mode: Literal["shared", "exclusive"]): + return Repository.locked(target, mode, repo_attr="repository") + + +class RestoreBackend(Enum): + AUTO = "auto" + PYTHON = "python" + RUST = "rust" + + def __str__(self): + return self.value + + +@dataclass(frozen=True) +class RBDRestoreArgs(RestoreArgs): + target: str + backend: RestoreBackend = RestoreBackend.AUTO + + def to_cmdargs(self) -> Iterable[str]: + return ["--backend", self.backend.value, self.target] + + @classmethod + def from_args(cls, **kw: Any) -> "RBDRestoreArgs": + return cls(kw["target"], kw["restore_backend"]) + + @classmethod + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: + restore_parser.add_argument( + "--backend", + type=RestoreBackend, + choices=list(RestoreBackend), + default=RestoreBackend.AUTO, + dest="restore_backend", + help="(default: %(default)s)", + ) + restore_parser.add_argument( + "target", + metavar="TARGET", + help='Copy backed up revision to TARGET. Use stdout if TARGET is "-"', + ) + + +class RBDSource(Source[RBDRestoreArgs]): + type_ = "rbd" + restore_type = RBDRestoreArgs + + ceph_rbd: "CephRBD" + store: Store + log: BoundLogger + + def __init__( + self, repository: Repository, ceph_rbd: "CephRBD", log: BoundLogger + ): + super().__init__(repository) + self.log = log.bind(subsystem="rbdsource") + self.ceph_rbd = ceph_rbd + self.store = Store(repository.path / "chunks", self.log) + + @classmethod + def from_config( + cls, repository: Repository, config: dict[str, Any], log: BoundLogger + ) -> "RBDSource": + assert config["type"] == "rbd" + return cls(repository, CephRBD.from_config(config, log), log) + + def _path_for_revision(self, revision: Revision) -> Path: + return self.repository.path / revision.uuid + + def open( + self, + revision: Revision, + mode: str = "rb", + parent: Optional[Revision] = None, + ) -> File: + if "w" in mode or "+" in mode: + if parent and not self._path_for_revision(revision).exists(): + with self._path_for_revision(revision).open( + "wb" + ) as new, self._path_for_revision(parent).open("rb") as old: + # This is ok, this is just metadata, not the actual data. + new.write(old.read()) + file = File(self._path_for_revision(revision), self.store, mode) + + if file.writable() and self.repository.contains_distrusted: + # "Force write"-mode if any revision is distrusted. + self.log.warn("forcing-full") + self.store.force_writes = True + + return file + + ################# + # Making backups + + @locked(target=".backup", mode="exclusive") + @locked(target=".purge", mode="shared") + def backup(self, revision: Revision) -> bool: + self.repository.path.joinpath("last").unlink(missing_ok=True) + self.repository.path.joinpath("last.rev").unlink(missing_ok=True) + + start = time.time() + + if not self.ceph_rbd.ready(): + raise RuntimeError( + "Source is not ready (does it exist? can you access it?)" + ) + + try: + with self.ceph_rbd(revision) as source: + parent_rev = source.get_parent() + with self.open(revision, "wb", parent_rev) as file: + if parent_rev: + source.diff(file, parent_rev) + else: + source.full(file) + with self.open(revision) as file: + verified = source.verify( + file, report=self.repository.add_report + ) + except BackendException: + self.log.exception("ceph-error-distrust-all") + verified = False + self.repository.distrust( + self.repository.find_revisions("local"), skip_lock=True + ) + if not verified: + self.log.error( + "verification-failed", + revision_uuid=revision.uuid, + ) + revision.remove() + else: + self.log.info("verification-ok", revision_uuid=revision.uuid) + revision.stats["duration"] = time.time() - start + revision.write_info() + revision.readonly() + # Switched from a fine-grained syncing mechanism to "everything + # once" when we're done. This is as safe but much faster. + os.sync() + + # If there are distrusted revisions, then perform at least one + # verification after a backup - for good measure and to keep things + # moving along automatically. This could also be moved into the + # scheduler. + self.repository.scan() + # TODO: move this to cli/daemon? + for revision in reversed( + self.repository.get_history(clean=True, local=True) + ): + if revision.trust == Trust.DISTRUSTED: + self.log.warning("inconsistent") + self.verify(revision, skip_lock=True) + break + return verified + + @locked(target=".purge", mode="shared") + @report_status + def verify(self, revision: Revision): + log = self.log.bind(revision_uuid=revision.uuid) + log.info("verify-start") + verified_chunks: Set[Hash] = set() + + # Load verified chunks to avoid duplicate work + for verified_revision in self.repository.get_history( + clean=True, local=True + ): + if verified_revision.trust != Trust.VERIFIED: + continue + verified_chunks.update( + self.open(verified_revision)._mapping.values() + ) + + log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) + + errors = False + # Go through all chunks and check them. Delete problematic ones. + f = self.open(revision) + hashes = set(f._mapping.values()) - verified_chunks + yield len(hashes) + 2 + for candidate in hashes: + yield + if candidate in verified_chunks: + continue + try: + c = Chunk(self.store, candidate) + c._read_existing() + except Exception: + log.exception("verify-error", chunk=candidate) + errors = True + try: + self.store.chunk_path(candidate).unlink(missing_ok=True) + except Exception: + log.exception("verify-remove-error", chunk=candidate) + # This is an optimisation: we can skip this revision, purge it + # and then keep verifying other chunks. This avoids checking + # things unnecessarily in duplicate. + # And we only mark it as verified if we never saw any problems. + break + + yield + + # TODO: move this to cli/daemon? + if errors: + # Found any issues? Delete this revision as we can't trust it. + revision.remove() + else: + # No problems found - mark as verified. + revision.verify() + revision.write_info() + + yield + + # Purge to ensure that we don't leave unused, potentially untrusted + # stuff around, especially if this was the last revision. + self.gc(skip_lock=True) + + yield END + yield None + + @locked(target=".purge", mode="exclusive") + def gc(self) -> None: + self.log.debug("purge") + used_chunks: Set[Hash] = set() + # TODO: also remove mapping file + # TODO: purge quarantine store + for revision in self.repository.local_history: + used_chunks.update(self.open(revision)._mapping.values()) + self.store.purge(used_chunks) + # TODO: move this to cli/daemon? + self.repository.clear_purge_pending() + + ################# + # Restoring + + # This needs no locking as it's only a wrapper for restore_file and + # restore_stdout and locking isn't re-entrant. + def restore(self, revision: Revision, args: RBDRestoreArgs) -> None: + s = self.open(revision) + restore_backend = args.backend + if restore_backend == RestoreBackend.AUTO: + if self.backy_extract_supported(s): + restore_backend = RestoreBackend.RUST + else: + restore_backend = RestoreBackend.PYTHON + self.log.info("restore-backend", backend=restore_backend.value) + if restore_backend == RestoreBackend.PYTHON: + with s as source: + if args.target != "-": + self.restore_file(source, args.target) + else: + self.restore_stdout(source) + elif restore_backend == RestoreBackend.RUST: + self.restore_backy_extract(revision, args.target) + + def backy_extract_supported(self, file: "backy.rbd.chunked.File") -> bool: + log = self.log.bind(subsystem="backy-extract") + if file.size % CHUNK_SIZE != 0: + log.debug("not-chunk-aligned") + return False + try: + version = subprocess.check_output( + [BACKY_EXTRACT, "--version"], + encoding="utf-8", + errors="replace", + ) + if not version.startswith("backy-extract"): + log.debug("unknown-version") + return False + except Exception: + log.debug("unavailable") + return False + return True + + # backy-extract acquires lock + def restore_backy_extract(self, rev: Revision, target: str) -> None: + log = self.log.bind(subsystem="backy-extract") + cmd = [BACKY_EXTRACT, str(self.repository.path / rev.uuid), target] + log.debug("started", cmd=cmd) + proc = subprocess.Popen(cmd) + return_code = proc.wait() + log.info( + "finished", + return_code=return_code, + subprocess_pid=proc.pid, + ) + if return_code: + raise RuntimeError( + f"backy-extract failed with return code {return_code}. " + "Maybe try `--backend python`?" + ) + + @locked(target=".purge", mode="shared") + def restore_file(self, source: IO, target_name: str) -> None: + """Bulk-copy from open revision `source` to target file.""" + self.log.debug("restore-file", source=source.name, target=target_name) + open(target_name, "ab").close() # touch into existence + with open(target_name, "r+b", buffering=CHUNK_SIZE) as target: + try: + posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore + except Exception: + pass + copy(source, target) + + @locked(target=".purge", mode="shared") + def restore_stdout(self, source: IO) -> None: + """Emit restore data to stdout (for pipe processing).""" + self.log.debug("restore-stdout", source=source.name) + try: + posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore + except Exception: + pass + with os.fdopen(os.dup(1), "wb") as target: + while True: + chunk = source.read(CHUNK_SIZE) + if not chunk: + break + target.write(chunk) + + +class CephRBD: + """The Ceph RBD source. + + Manages snapshots corresponding to revisions and provides a verification + that tries to balance reliability and performance. + """ + + pool: str + image: str + always_full: bool + vm: Optional[str] + consul_acl_token: Optional[str] + rbd: RBDClient + revision: Revision + log: BoundLogger + + snapshot_timeout = 90 + + def __init__( + self, + pool: str, + image: str, + log: BoundLogger, + vm: Optional[str] = None, + consul_acl_token: Optional[str] = None, + always_full: bool = False, + ): + self.pool = pool + self.image = image + self.always_full = always_full + self.vm = vm + self.consul_acl_token = consul_acl_token + self.log = log.bind(subsystem="ceph") + self.rbd = RBDClient(self.log) + + @classmethod + def from_config(cls, config: dict, log: BoundLogger) -> "CephRBD": + return cls( + config["pool"], + config["image"], + log, + config.get("vm"), + config.get("consul_acl_token"), + config.get("full-always", False), + ) + + def ready(self) -> bool: + """Check whether the source can be backed up. + + For RBD sources this means the volume exists and is accessible. + + """ + try: + if self.rbd.exists(self._image_name): + return True + except Exception: + self.log.exception("not-ready") + return False + + def __call__(self, revision): + self.revision = revision + return self + + def __enter__(self): + snapname = "backy-{}".format(self.revision.uuid) + self.create_snapshot(snapname) + return self + + def create_snapshot(self, name: str) -> None: + if not self.consul_acl_token or not self.vm: + self.rbd.snap_create(self._image_name + "@" + name) + return + + consul = consulate.Consul(token=self.consul_acl_token) + snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) + self.log.info( + "creating-snapshot", + snapshot_name=name, + snapshot_key=snapshot_key, + ) + + consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} + + time.sleep(3) + try: + timeout = TimeOut( + self.snapshot_timeout, interval=2, raise_on_timeout=True + ) + while timeout.tick(): + for snapshot in self.rbd.snap_ls(self._image_name): + if snapshot["name"] == name: + return + except TimeOutError: + # The VM might have been shut down. Try doing a regular Ceph + # snapshot locally. + self.rbd.snap_create(self._image_name + "@" + name) + except KeyboardInterrupt: + raise + finally: + # In case the snapshot still gets created: the general snapshot + # deletion code in ceph/source will clean up unused backy snapshots + # anyway. However, we need to work a little harder to delete old + # snapshot requests, otherwise we've sometimes seen those not + # getting deleted and then re-created all the time. + for key in list(consul.kv.find("snapshot/")): + try: + s = consul.kv[key] + except KeyError: + continue + try: + s = json.loads(s) + except json.decoder.JSONDecodeError: + # Clean up garbage. + self.log.warning( + "create-snapshot-removing-garbage-request", + snapshot_key=key, + ) + del consul.kv[key] + if s["vm"] != self.vm: + continue + # The knowledge about the `backy-` prefix isn't properly + # encapsulated here. + if s["snapshot"].startswith("backy-"): + self.log.info( + "create-snapshot-removing-request", + vm=s["vm"], + snapshot_name=s["snapshot"], + snapshot_key=key, + ) + del consul.kv[key] + + @property + def _image_name(self) -> str: + return "{}/{}".format(self.pool, self.image) + + def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): + self._delete_old_snapshots() + + def get_parent(self) -> Optional[Revision]: + if self.always_full: + self.log.info("backup-always-full") + return None + revision = self.revision + while True: + parent = revision.get_parent() + if not parent: + self.log.info("backup-no-valid-parent") + return None + if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): + self.log.info( + "ignoring-rev-without-snapshot", + revision_uuid=parent.uuid, + ) + revision = parent + continue + # Ok, it's trusted and we have a snapshot. Let's do a diff. + return parent + + def diff(self, target: File, parent: Revision) -> None: + self.log.info("diff") + snap_from = "backy-" + parent.uuid + snap_to = "backy-" + self.revision.uuid + s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) + with s as source: + source.integrate(target, snap_from, snap_to) + self.log.info("diff-integration-finished") + + def full(self, target: File) -> None: + self.log.info("full") + s = self.rbd.export( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + with s as source: + while buf := source.read(4 * backy.utils.MiB): + target.write(buf) + + def verify( + self, + target: File, + report: Callable[[ChunkMismatchReport], None] = lambda _: None, + ) -> bool: + s = self.rbd.image_reader( + "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) + ) + self.revision.stats["ceph-verification"] = "partial" + + with s as source: + self.log.info("verify") + return backy.utils.files_are_roughly_equal( + source, + cast(IO, target), + report=lambda s, t, o: report(ChunkMismatchReport(s, t, o)), + ) + + def _delete_old_snapshots(self) -> None: + # Clean up all snapshots except the one for the most recent valid + # revision. + # Previously we used to remove all snapshots but the one for this + # revision - which is wrong: broken new revisions would always cause + # full backups instead of new deltas based on the most recent valid + # one. + # XXX this will break if multiple servers are active + if not self.always_full and self.revision.repository.local_history: + keep_snapshot_revision = self.revision.repository.local_history[ + -1 + ].uuid + else: + keep_snapshot_revision = None + for snapshot in self.rbd.snap_ls(self._image_name): + if not snapshot["name"].startswith("backy-"): + # Do not touch non-backy snapshots + continue + uuid = snapshot["name"].replace("backy-", "") + if uuid != keep_snapshot_revision: + time.sleep(3) # avoid race condition while unmapping + self.log.info( + "delete-old-snapshot", snapshot_name=snapshot["name"] + ) + try: + self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) + except Exception: + self.log.exception( + "delete-old-snapshot-failed", + snapshot_name=snapshot["name"], + ) + + +def main(): + sys.exit(RBDSource.main(*sys.argv)) diff --git a/src/backy/rbd/chunked/__init__.py b/src/backy/rbd/chunked/__init__.py new file mode 100644 index 00000000..2bd87626 --- /dev/null +++ b/src/backy/rbd/chunked/__init__.py @@ -0,0 +1,27 @@ +from typing import TypeAlias + +Hash: TypeAlias = str + + +class BackendException(IOError): + pass + + +class InconsistentHash(BackendException): + def __init__(self, expected, actual): + self.expected = expected + self.actual = actual + + +from .chunk import Chunk +from .file import File +from .store import Store + +__all__ = [ + "Chunk", + "File", + "Store", + "Hash", + "BackendException", + "InconsistentHash", +] diff --git a/src/backy/backends/chunked/chunk.py b/src/backy/rbd/chunked/chunk.py similarity index 80% rename from src/backy/backends/chunked/chunk.py rename to src/backy/rbd/chunked/chunk.py index 7939a336..04b9aa82 100644 --- a/src/backy/backends/chunked/chunk.py +++ b/src/backy/rbd/chunked/chunk.py @@ -2,27 +2,17 @@ import io import os import tempfile -from typing import Optional, Tuple, TypeAlias +from typing import TYPE_CHECKING, Optional, Tuple import lzo import mmh3 -import backy.backends.chunked -from backy.backends import BackendException from backy.utils import posix_fadvise -Hash: TypeAlias = str +from . import BackendException, Hash, InconsistentHash -chunk_stats = { - "write_full": 0, - "write_partial": 0, -} - - -class InconsistentHash(BackendException): - def __init__(self, expected, actual): - self.expected = expected - self.actual = actual +if TYPE_CHECKING: + from .store import Store class Chunk(object): @@ -36,19 +26,19 @@ class Chunk(object): CHUNK_SIZE = 4 * 1024**2 # 4 MiB chunks hash: Optional[Hash] - store: "backy.backends.chunked.Store" + store: "Store" clean: bool data: Optional[io.BytesIO] + stats: dict def __init__( - self, - store: "backy.backends.chunked.Store", - hash: Optional[Hash], + self, store: "Store", hash: Optional[Hash], stats: Optional[dict] = None ): self.hash = hash self.store = store self.clean = True self.data = None + self.stats = stats if stats is not None else dict() def _read_existing(self) -> None: if self.data: @@ -104,13 +94,15 @@ def write(self, offset: int, data: bytes) -> Tuple[int, bytes]: if offset == 0 and len(data) == self.CHUNK_SIZE: # Special case: overwrite the entire chunk. self._init_data(data) - chunk_stats["write_full"] += 1 + self.stats.setdefault("write_full", 0) + self.stats["write_full"] += 1 else: self._read_existing() assert self.data self.data.seek(offset) self.data.write(data) - chunk_stats["write_partial"] += 1 + self.stats.setdefault("write_partial", 0) + self.stats["write_partial"] += 1 self.clean = False return len(data), remaining_data @@ -126,14 +118,11 @@ def flush(self) -> Optional[Hash]: # use a faster path to get the data. self.hash = hash(self.data.getvalue()) target = self.store.chunk_path(self.hash) - needs_forced_write = ( - self.store.force_writes and self.hash not in self.store.seen_forced - ) if self.hash not in self.store.seen: - if needs_forced_write or not target.exists(): - # Create the tempfile in the right directory to increase locality - # of our change - avoid renaming between multiple directories to - # reduce traffic on the directory nodes. + if self.store.force_writes or not target.exists(): + # Create the tempfile in the right directory to increase + # locality of our change - avoid renaming between multiple + # directories to reduce traffic on the directory nodes. fd, tmpfile_name = tempfile.mkstemp(dir=target.parent) posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore with os.fdopen(fd, mode="wb") as f: @@ -143,7 +132,6 @@ def flush(self) -> Optional[Hash]: # metadata flushes and then changing metadata again. os.chmod(tmpfile_name, 0o440) os.rename(tmpfile_name, target) - self.store.seen_forced.add(self.hash) self.store.seen.add(self.hash) self.clean = True return self.hash diff --git a/src/backy/backends/chunked/file.py b/src/backy/rbd/chunked/file.py similarity index 91% rename from src/backy/backends/chunked/file.py rename to src/backy/rbd/chunked/file.py index f17e37eb..ced489e0 100644 --- a/src/backy/backends/chunked/file.py +++ b/src/backy/rbd/chunked/file.py @@ -4,12 +4,13 @@ import os.path import time from collections import defaultdict -from typing import Optional, Tuple - -import backy.backends.chunked +from typing import TYPE_CHECKING, Optional, Tuple from .chunk import Chunk, Hash +if TYPE_CHECKING: + from backy.rbd.chunked import Store + class File(object): """A file like class that stores its data in 4MiB chunks @@ -32,9 +33,9 @@ class File(object): flush_target = 10 name: str - store: "backy.backends.chunked.Store" + store: "Store" + stats: dict closed: bool - overlay: bool size: int mode: str @@ -46,16 +47,16 @@ class File(object): def __init__( self, name: str | os.PathLike, - store: "backy.backends.chunked.Store", + store: "Store", mode: str = "rw", - overlay: bool = False, + stats: Optional[dict] = None, ): self.name = str(name) self.store = store + self.stats = stats if stats is not None else dict() self.closed = False # This indicates that writes should be temporary and no modify # the metadata when closing. - self.overlay = overlay self._position = 0 self._access_stats = defaultdict(lambda: (0, 0)) @@ -123,11 +124,10 @@ def flush(self) -> None: self._flush_chunks(0) - if not self.overlay: - with open(self.name, "w") as f: - json.dump({"mapping": self._mapping, "size": self.size}, f) - f.flush() - os.fsync(f) + with open(self.name, "w") as f: + json.dump({"mapping": self._mapping, "size": self.size}, f) + f.flush() + os.fsync(f) def close(self) -> None: assert not self.closed @@ -226,6 +226,8 @@ def writable(self) -> bool: def write(self, data: bytes) -> None: assert "w" in self.mode and not self.closed + self.stats.setdefault("bytes_written", 0) + self.stats["bytes_written"] += len(data) while data: chunk, _, offset = self._current_chunk() written, data = chunk.write(offset, data) @@ -239,7 +241,9 @@ def _current_chunk(self) -> Tuple[Chunk, int, int]: if chunk_id not in self._chunks: self._flush_chunks() self._chunks[chunk_id] = Chunk( - self.store, self._mapping.get(chunk_id) + self.store, + self._mapping.get(chunk_id), + self.stats.setdefault("chunk_stats", dict()), ) count = self._access_stats[chunk_id][0] self._access_stats[chunk_id] = (count + 1, time.time()) diff --git a/src/backy/backends/chunked/store.py b/src/backy/rbd/chunked/store.py similarity index 93% rename from src/backy/backends/chunked/store.py rename to src/backy/rbd/chunked/store.py index d60151a0..18d310bf 100644 --- a/src/backy/backends/chunked/store.py +++ b/src/backy/rbd/chunked/store.py @@ -3,7 +3,7 @@ from structlog.stdlib import BoundLogger -from backy.backends.chunked.chunk import Hash +from backy.rbd.chunked.chunk import Hash # A chunkstore, is responsible for all revisions for a single backup, for now. # We can start having statistics later how much reuse between images is @@ -22,17 +22,16 @@ class Store(object): force_writes = False path: Path - seen_forced: set[Hash] seen: set[Hash] log: BoundLogger def __init__(self, path: Path, log: BoundLogger): self.path = path - self.seen_forced = set() self.log = log.bind(subsystem="chunked-store") + self.path.mkdir(exist_ok=True) for x in range(256): subdir = self.path / f"{x:02x}" - subdir.mkdir(parents=True, exist_ok=True) + subdir.mkdir(exist_ok=True) if not self.path.joinpath("store").exists(): self.convert_to_v2() diff --git a/src/backy/rbd/chunked/tests/__init__.py b/src/backy/rbd/chunked/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backy/backends/chunked/tests/test_chunk.py b/src/backy/rbd/chunked/tests/test_chunk.py similarity index 94% rename from src/backy/backends/chunked/tests/test_chunk.py rename to src/backy/rbd/chunked/tests/test_chunk.py index 0dc84868..6f2c2f0b 100644 --- a/src/backy/backends/chunked/tests/test_chunk.py +++ b/src/backy/rbd/chunked/tests/test_chunk.py @@ -3,9 +3,9 @@ import lzo import pytest -from backy.backends.chunked.chunk import Chunk, InconsistentHash, hash -from backy.backends.chunked.file import File -from backy.backends.chunked.store import Store +from backy.rbd.chunked.chunk import Chunk, InconsistentHash, hash +from backy.rbd.chunked.file import File +from backy.rbd.chunked.store import Store SPACE_CHUNK = b" " * Chunk.CHUNK_SIZE SPACE_CHUNK_HASH = "c01b5d75bfe6a1fa5bca6e492c5ab09a" @@ -96,8 +96,6 @@ def test_chunk_fails_wrong_content(tmp_path, log): with open(p, "wb") as existing: existing.write(lzo.compress(b"bsdf")) - f = File(tmp_path / "asdf", store) - chunk = Chunk(store, chunk_hash) with pytest.raises(InconsistentHash): chunk.read(0) diff --git a/src/backy/backends/chunked/tests/test_file.py b/src/backy/rbd/chunked/tests/test_file.py similarity index 97% rename from src/backy/backends/chunked/tests/test_file.py rename to src/backy/rbd/chunked/tests/test_file.py index adcd128e..586d9e70 100644 --- a/src/backy/backends/chunked/tests/test_file.py +++ b/src/backy/rbd/chunked/tests/test_file.py @@ -4,9 +4,9 @@ import lzo import pytest -from backy.backends.chunked.chunk import Chunk, InconsistentHash -from backy.backends.chunked.file import File -from backy.backends.chunked.store import Store +from backy.rbd.chunked.chunk import Chunk, InconsistentHash +from backy.rbd.chunked.file import File +from backy.rbd.chunked.store import Store def test_simple_open_write_read_seek(tmp_path, log): @@ -244,3 +244,6 @@ def test_rplus_and_append_positions(tmp_path, log): with File(tmp_path / "asdf", store) as f: assert f.read() == b"bsdfcsdf" + + +# TODO test bytes_written and chunk_stats diff --git a/src/backy/rbd/rbd.py b/src/backy/rbd/rbd.py new file mode 100644 index 00000000..f53cee1b --- /dev/null +++ b/src/backy/rbd/rbd.py @@ -0,0 +1,326 @@ +import contextlib +import json +import struct +import subprocess +from collections import namedtuple +from typing import IO, BinaryIO, Iterator, Optional + +from structlog.stdlib import BoundLogger + +from backy.ext_deps import RBD +from backy.utils import CHUNK_SIZE, punch_hole + + +def detect_whole_object_support(): + result = subprocess.run( + ["rbd", "help", "export-diff"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + return "--whole-object" in result.stdout.decode("ascii") + + +try: + CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = detect_whole_object_support() +except Exception: + CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = False + + +class RBDClient(object): + log: BoundLogger + + def __init__(self, log: BoundLogger): + self.log = log.bind(subsystem="rbd") + + def _ceph_cli(self, cmdline, encoding="utf-8") -> str: + # This wrapper function for the `rbd` command is only used for getting + # and interpreting text messages, making this the correct level for + # managing text encoding. Other use cases where binary data is piped + # to rbd have their own dedicated wrappers. + return subprocess.check_output( + cmdline, encoding=encoding, errors="replace" + ) + + def _rbd(self, cmd, format=None): + cmd = filter(None, cmd) + rbd = [RBD] + + rbd.extend(cmd) + + if format == "json": + rbd.append("--format=json") + + self.log.debug("executing-command", command=" ".join(rbd)) + result = self._ceph_cli(rbd) + + self.log.debug("executed-command", stdout=result) + if format == "json": + result = json.loads(result) + + return result + + def exists(self, snapspec: str): + try: + return self._rbd(["info", snapspec], format="json") + except subprocess.CalledProcessError as e: + if e.returncode == 2: + return False + raise + + def map(self, image: str, readonly=False): + def parse_mappings_pre_nautilus(mappings): + """The parser code for Ceph release Luminous and earlier.""" + for mapping in mappings.values(): + if image == "{pool}/{name}@{snap}".format(**mapping): + return mapping + raise RuntimeError("Map not found in mapping list.") + + def parse_mappings_since_nautilus(mappings): + """The parser code for Ceph release Nautilus and later.""" + for mapping in mappings: + if image == "{pool}/{name}@{snap}".format(**mapping): + return mapping + raise RuntimeError("Map not found in mapping list.") + + versionstring = self._rbd(["--version"]) + + self._rbd(["map", image, "--read-only" if readonly else ""]) + + mappings_raw = self._rbd(["showmapped"], format="json") + + if "nautilus" in versionstring: + mapping = parse_mappings_since_nautilus(mappings_raw) + elif "luminous" in versionstring: + mapping = parse_mappings_pre_nautilus(mappings_raw) + else: + # our jewel build provides no version info + # this will break with releases newer than nautilus + mapping = parse_mappings_pre_nautilus(mappings_raw) + + def scrub_mapping(mapping): + SPEC = {"pool", "name", "snap", "device"} + # Ensure all specced keys exist + for key in SPEC: + if key not in mapping: + raise KeyError( + f"Missing key `{key}` in mapping {mapping!r}" + ) + # Scrub all non-specced keys + for key in list(mapping): + if key not in SPEC: + del mapping[key] + return mapping + + return scrub_mapping(mapping) + + def unmap(self, device): + self._rbd(["unmap", device]) + + def snap_create(self, image): + self._rbd(["snap", "create", image]) + + def snap_ls(self, image): + return self._rbd(["snap", "ls", image], format="json") + + def snap_rm(self, image): + return self._rbd(["snap", "rm", image]) + + @contextlib.contextmanager + def export_diff(self, new: str, old: str) -> Iterator["RBDDiffV1"]: + self.log.info("export-diff") + if CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: + EXPORT_WHOLE_OBJECT = ["--whole-object"] + else: + EXPORT_WHOLE_OBJECT = [] + proc = subprocess.Popen( + [RBD, "export-diff", new, "--from-snap", old] + + EXPORT_WHOLE_OBJECT + + ["-"], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + # Have a rather largish buffer size, so rbd has some room to + # push its data to, when we are busy writing. + bufsize=8 * CHUNK_SIZE, + ) + assert proc.stdout is not None + try: + yield RBDDiffV1(proc.stdout) + finally: + proc.stdout.close() + proc.wait() + + @contextlib.contextmanager + def image_reader(self, image: str) -> Iterator[BinaryIO]: + mapped = self.map(image, readonly=True) + source = open(mapped["device"], "rb", buffering=CHUNK_SIZE) + try: + yield source + finally: + source.close() + self.unmap(mapped["device"]) + + @contextlib.contextmanager + def export(self, image: str) -> Iterator[IO]: + self.log.info("export") + proc = subprocess.Popen( + [RBD, "export", image, "-"], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + # Have a rather largish buffer size, so rbd has some room to + # push its data to, when we are busy writing. + bufsize=4 * CHUNK_SIZE, + ) + assert proc.stdout is not None + try: + yield proc.stdout + finally: + proc.stdout.close() + proc.wait() + + +def unpack_from(fmt, f): + size = struct.calcsize(fmt) + b = f.read(size) + return struct.unpack(fmt, b) + + +Zero = namedtuple("Zero", ["start", "length"]) +Data = namedtuple("Data", ["start", "length", "stream"]) +SnapSize = namedtuple("SnapSize", ["size"]) +FromSnap = namedtuple("FromSnap", ["snapshot"]) +ToSnap = namedtuple("ToSnap", ["snapshot"]) + + +class RBDDiffV1(object): + f: IO + phase: str # header, metadata, data + record_type: Optional[str] + _streaming: bool + + header = b"rbd diff v1\n" + + def __init__(self, fh): + # self.filename = filename + self.f = fh + + self.phase = "header" + self.read_header() + self.record_type = None + self._streaming = False + + def read_header(self): + assert self.phase == "header" + header = self.f.read(len(self.header)) + if header != self.header: + raise ValueError("Unexpected header: {0!r}".format(header)) + self.phase = "metadata" + + def read_record(self): + if self.phase == "end": + return + assert not self._streaming, "Unread data from read_w. Consume first." + last_record_type = self.record_type + self.record_type = self.f.read(1).decode("ascii") + if self.record_type not in ["f", "t", "s", "w", "z", "e"]: + raise ValueError( + 'Got invalid record type "{}". Previous record: {}'.format( + self.record_type, last_record_type + ) + ) + method = getattr(self, "read_{}".format(self.record_type)) + return method() + + def read_fbytes(self, encoding=None): + length = unpack_from(""], + ), + ( + ["backup", "asdf"], + 1, + 0, + [""], + ), + ( + ["restore", "asdf", "out.img"], + None, + 0, + [ + "", + "RBDRestoreArgs(target='out.img', backend=)", + ], + ), + ( + ["restore", "asdf", "--backend", "python", "out.img"], + None, + 0, + [ + "", + "RBDRestoreArgs(target='out.img', backend=)", + ], + ), + (["gc"], None, 0, []), + ( + ["verify", "asdf"], + None, + 0, + [""], + ), + ], +) +def test_call_fun( + args, + rv, + rc, + params, + source_on_disk, + tmp_path, + capsys, + monkeypatch, + log, +): + os.chdir(tmp_path) + + Revision(source_on_disk.repository, log, uuid="asdf").materialize() + + monkeypatch.setattr( + backy.rbd.RBDSource, + args[0], + partialmethod(print_args, return_value=rv), + ) + utils.log_data = "" + exit = RBDSource.main( + "backy-rbd", "-v", "-C", str(source_on_disk.repository.path), *args + ) + assert exit == rc + out, err = capsys.readouterr() + assert ( + Ellipsis( + f"""\ +{", ".join(["", *params])} +{{}} +""" + ) + == out + ) + assert ( + Ellipsis( + f"""\ +... D - command/invoked args='backy-rbd -v -C ... {" ".join([ *args])}' +... D test01 repo/scan-reports entries=0 +... D - command/return-code code={rc} +""" + ) + == utils.log_data + ) + + +def test_call_unexpected_exception( + capsys, source_on_disk, monkeypatch, log, tmp_path +): + def do_raise(*args, **kw): + raise RuntimeError("test") + + monkeypatch.setattr(backy.rbd.RBDSource, "gc", do_raise) + import os + + monkeypatch.setattr(os, "_exit", lambda x: None) + + utils.log_data = "" + exit = RBDSource.main( + "backy-rbd", "-C", str(source_on_disk.repository.path), "gc" + ) + assert exit == 1 + out, err = capsys.readouterr() + assert "" == out + assert ( + Ellipsis( + """\ +... D - command/invoked args='backy-rbd -C ... gc' +... D test01 repo/scan-reports entries=0 +... E - command/failed exception_class='builtins.RuntimeError' exception_msg='test' +exception>\tTraceback (most recent call last): +exception>\t File ".../src/backy/source.py", line ..., in main +exception>\t source.gc() +exception>\t File ".../src/backy/rbd/tests/test_main.py", line ..., in do_raise +exception>\t raise RuntimeError("test") +exception>\tRuntimeError: test +""" + ) + == utils.log_data + ) diff --git a/src/backy/sources/ceph/tests/test_rbd.py b/src/backy/rbd/tests/test_rbd.py similarity index 97% rename from src/backy/sources/ceph/tests/test_rbd.py rename to src/backy/rbd/tests/test_rbd.py index 1b0033b3..fb80819f 100644 --- a/src/backy/sources/ceph/tests/test_rbd.py +++ b/src/backy/rbd/tests/test_rbd.py @@ -1,13 +1,11 @@ -import os import subprocess from unittest import mock import pytest -import backy.sources.ceph from backy.ext_deps import RBD -from backy.sources.ceph.diff import RBDDiffV1 -from backy.sources.ceph.rbd import RBDClient +from backy.rbd import RBDClient +from backy.rbd.rbd import RBDDiffV1 @mock.patch("subprocess.check_output") diff --git a/src/backy/rbd/tests/test_source.py b/src/backy/rbd/tests/test_source.py new file mode 100644 index 00000000..1bf749da --- /dev/null +++ b/src/backy/rbd/tests/test_source.py @@ -0,0 +1,307 @@ +import os +import subprocess +from pathlib import Path +from typing import IO +from unittest import mock + +import pytest + +from backy.conftest import create_rev +from backy.ext_deps import BACKY_RBD_CMD, BASH +from backy.rbd import CephRBD, RBDRestoreArgs, RBDSource +from backy.source import CmdLineSource +from backy.tests import Ellipsis +from backy.utils import CHUNK_SIZE + + +class FakeCephRBD: + data = "" + + def __init__(self, data): + self.data = data + + def ready(self): + return bool(self.data) + + def __call__(self, *args, **kwargs): + return self + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def get_parent(self): + return None + + def full(self, file): + assert self.data + file.write(self.data) + + def verify(self, target: IO, report=None): + return self.data == target.read() + + +@pytest.fixture +def rbdsource(repository, log): + return RBDSource(repository, FakeCephRBD(b""), log) + + +def test_configure_rbd_source_no_consul(repository, tmp_path, log): + config = { + "path": str(tmp_path), + "schedule": {}, + "source": { + "type": "rbd", + "pool": "test", + "image": "test04.root", + }, + } + source = CmdLineSource.from_config(config, log).create_source() + assert isinstance(source, RBDSource) + ceph_rbd = source.ceph_rbd + assert isinstance(ceph_rbd, CephRBD) + assert ceph_rbd.pool == "test" + assert ceph_rbd.image == "test04.root" + assert ceph_rbd.always_full is False + assert ceph_rbd.vm is None + assert ceph_rbd.consul_acl_token is None + + +def test_configure_rbd_source_consul(repository, tmp_path, log): + config = { + "path": str(tmp_path), + "schedule": {}, + "source": { + "type": "rbd", + "pool": "test", + "image": "test04.root", + "full-always": True, + "vm": "test04", + "consul_acl_token": "token", + }, + } + source = CmdLineSource.from_config(config, log).create_source() + assert isinstance(source, RBDSource) + ceph_rbd = source.ceph_rbd + assert isinstance(ceph_rbd, CephRBD) + assert ceph_rbd.pool == "test" + assert ceph_rbd.image == "test04.root" + assert ceph_rbd.always_full is True + assert ceph_rbd.vm == "test04" + assert ceph_rbd.consul_acl_token == "token" + + +def test_restore_target(rbdsource, repository, tmp_path, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + target = tmp_path / "restore.img" + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RBDRestoreArgs(str(target))) + with open(target, "rb") as t: + assert data == t.read() + + +def test_restore_stdout(rbdsource, repository, capfd, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RBDRestoreArgs("-")) + assert not Path("-").exists() + out, err = capfd.readouterr() + assert data.decode("utf-8") == out + + +def test_restore_backy_extract(rbdsource, repository, monkeypatch, log): + check_output = mock.Mock(return_value="backy-extract 1.1.0") + monkeypatch.setattr(subprocess, "check_output", check_output) + rbdsource.restore_backy_extract = mock.Mock() + data = b"a" * CHUNK_SIZE + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + rbdsource.restore(r, RBDRestoreArgs("restore.img")) + check_output.assert_called() + rbdsource.restore_backy_extract.assert_called_once_with(r, "restore.img") + + +def test_backup_corrupted(rbdsource, repository, log): + data = b"volume contents\n" + rbdsource.ceph_rbd.data = data + r = create_rev(repository, {"daily"}) + rbdsource.backup(r) + + chunk_path = rbdsource.store.chunk_path(next(iter(rbdsource.store.seen))) + chunk_path.chmod(0o664) + with open(chunk_path, "wb") as f: + f.write(b"invalid") + r2 = create_rev(repository, {"daily"}) + rbdsource.backup(r2) + + assert repository.history == [] + assert not chunk_path.exists() + + +def test_gc(rbdsource, repository, log): + r = create_rev(repository, set()) + # Write 1 version to the file + with rbdsource.open(r, "wb") as f: + f.write(b"asdf") + remote = create_rev(repository, set()) # remote revision without local data + remote.server = "remote" + remote.materialize() + + # Reassign as the scan will create a new reference + r = repository.find_by_uuid(r.uuid) + assert len(list(rbdsource.store.ls())) == 1 + rbdsource.gc() + assert len(list(rbdsource.store.ls())) == 1 + r.remove() + rbdsource.gc() + assert len(list(rbdsource.store.ls())) == 0 + + +def test_smoketest_internal(rbdsource, repository, tmp_path, log): + # These copies of data are intended to be different versions of the same + # file. + data1 = b"1" * 2 * 1024**2 + data2 = b"2" * 2 * 1024**2 + data3 = b"3" * 2 * 1024**2 + + # Backup first state + rbdsource.ceph_rbd.data = data1 + rev1 = create_rev(repository, {"manual:test"}) + rbdsource.backup(rev1) + + # Restore first state from the newest revision + restore_args = RBDRestoreArgs(str(tmp_path / "image1.restore")) + rbdsource.restore(rev1, restore_args) + with pytest.raises(IOError): + open(repository.history[-1].info_filename, "wb") + assert data1 == open(restore_args.target, "rb").read() + + # Backup second state + rbdsource.ceph_rbd.data = data2 + rev2 = create_rev(repository, {"test"}) + rbdsource.backup(rev2) + assert len(repository.history) == 2 + + # Restore second state from second backup which is the newest at position 0 + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Our original backup is now at position 1. Lets restore that again. + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + # Backup second state again + rbdsource.ceph_rbd.data = data2 + rev3 = create_rev(repository, {"manual:test"}) + rbdsource.backup(rev3) + assert len(repository.history) == 3 + + # Restore image2 from its most recent at position 0 + rbdsource.restore(rev3, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Restore image2 from its previous backup, now at position 1 + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Our original backup is now at position 2. Lets restore that again. + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + # Backup third state + rbdsource.ceph_rbd.data = data3 + rev4 = create_rev(repository, {"test"}) + rbdsource.backup(rev4) + assert len(repository.history) == 4 + + # Restore image3 from the most curent state + rbdsource.restore(rev4, restore_args) + assert data3 == open(restore_args.target, "rb").read() + + # Restore image2 from position 1 and 2 + rbdsource.restore(rev3, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + rbdsource.restore(rev2, restore_args) + assert data2 == open(restore_args.target, "rb").read() + + # Restore image1 from position 3 + rbdsource.restore(rev1, restore_args) + assert data1 == open(restore_args.target, "rb").read() + + +@pytest.mark.slow +@pytest.mark.skip +def test_smoketest_external(): + output = subprocess.check_output( + [BASH, Path(__file__).parent / "smoketest.sh"], + env=os.environ | {"BACKY_RBD_CMD": BACKY_RBD_CMD}, + ) + output = output.decode("utf-8") + assert ( + Ellipsis( + """\ +Using /... as workspace. +Generating Test Data.. Done. +Backing up img_state1.img. Done. +Backing up img_state1.img with unknown tag. Done. +Restoring img_state1.img from level 0. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state2.img. Done. +Restoring img_state2.img from level 0. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 1. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state2.img again. Done. +Restoring img_state2.img from level 0. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state2.img from level 1. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 2. Done. +Diffing restore_state1.img against img_state1.img. Success. +Backing up img_state3.img. Done. +Restoring img_state3.img from level 0. Done. +Diffing restore_state3.img against img_state3.img. Success. +Restoring img_state2.img from level 1. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state2.img from level 2. Done. +Diffing restore_state2.img against img_state2.img. Success. +Restoring img_state1.img from level 3. Done. +Diffing restore_state1.img against img_state1.img. Success. +┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ +┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ +┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ +┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ daily │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ +4 revisions containing 2.0 MiB data (estimated) +┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ +┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ +┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ +┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ +│ ... │ │ │ │ │ │ │ +│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ +│ ... │ │ │ │ │ │ │ +└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ +3 revisions containing 1.5 MiB data (estimated) +""" + ) + == output + ) diff --git a/src/backy/report.py b/src/backy/report.py new file mode 100644 index 00000000..17144c6f --- /dev/null +++ b/src/backy/report.py @@ -0,0 +1,109 @@ +import datetime +import hashlib +import traceback +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional + +import shortuuid +import yaml +from structlog.stdlib import BoundLogger +from yaml import SafeDumper + +import backy.utils +from backy.utils import SafeFile + + +class ProblemReport(ABC): + uuid: str + timestamp: datetime.datetime + + def __init__( + self, + uuid: Optional[str] = None, + timestamp: Optional[datetime.datetime] = None, + ): + self.uuid = uuid or shortuuid.uuid() + self.timestamp = timestamp or backy.utils.now() + + def to_dict(self) -> dict: + return { + "uuid": self.uuid, + "timestamp": self.timestamp, + } + + @abstractmethod + def get_message(self) -> str: + ... + + def store(self, dir: Path, log: BoundLogger) -> None: + log.debug("store-report", uuid=self.uuid) + path = dir / f"{self.uuid}.report" + if path.exists(): + log.debug("store-report-exists", uuid=self.uuid) + return + + class CustomDumper(SafeDumper): + pass + + def representer(dumper, data): + return dumper.represent_scalar( + "tag:yaml.org,2002:str", + data, + style="|" if len(data) > 100 else None, + ) + + yaml.add_representer(str, representer, Dumper=CustomDumper) + + with SafeFile(path, encoding="utf-8") as f: + f.open_new("wb") + yaml.dump(self.to_dict(), f, sort_keys=False, Dumper=CustomDumper) + + +class ChunkMismatchReport(ProblemReport): + source_chunk: bytes + source_hash: str + target_chunk: bytes + target_hash: str + offset: int + traceback: str + + def __init__(self, source_chunk: bytes, target_chunk: bytes, offset: int): + super().__init__() + self.source_chunk = source_chunk + self.target_chunk = target_chunk + self.offset = offset + self.source_hash = hashlib.md5(self.source_chunk).hexdigest() + self.target_hash = hashlib.md5(self.target_chunk).hexdigest() + self.traceback = "".join(traceback.format_stack()).strip() + + def to_dict(self) -> dict: + return super().to_dict() | { + "source_hash": self.source_hash, + "target_hash": self.target_hash, + "offset": self.offset, + "traceback": self.traceback, + } + + def get_message(self) -> str: + return f"Mismatching chunks at offset {self.offset}" + + def store(self, dir: Path, log: BoundLogger) -> None: + chunks_path = dir / "chunks" + chunks_path.mkdir(exist_ok=True) + self._store_chunk(chunks_path, self.source_chunk, self.source_hash, log) + self._store_chunk(chunks_path, self.target_chunk, self.target_hash, log) + super().store(dir, log) + + @staticmethod + def _store_chunk( + dir: Path, chunk: bytes, hash: str, log: BoundLogger + ) -> None: + log.debug("store-chunk", hash=hash) + path = dir / hash + if path.exists(): + log.debug("store-chunk-exists", hash=hash) + return + with SafeFile(path) as f: + f.open_new("wb") + f.write(chunk) diff --git a/src/backy/repository.py b/src/backy/repository.py new file mode 100644 index 00000000..25137629 --- /dev/null +++ b/src/backy/repository.py @@ -0,0 +1,553 @@ +import contextlib +import datetime +import fcntl +import re +from math import ceil, floor +from pathlib import Path +from typing import IO, Any, Iterable, List, Literal, Optional, TypedDict + +import tzlocal +from structlog.stdlib import BoundLogger + +import backy +from backy.utils import ( + duplicates, + list_get, + list_rindex, + list_split, + min_date, + unique, +) + +from .report import ProblemReport +from .revision import Revision, Trust, filter_schedule_tags +from .schedule import Schedule + + +class StatusDict(TypedDict): + job: str + sla: str + sla_overdue: int + status: str + last_time: Optional[datetime.datetime] + last_tags: Optional[str] + last_duration: Optional[float] + next_time: Optional[datetime.datetime] + next_tags: Optional[str] + manual_tags: str + problem_reports: int + unsynced_revs: int + local_revs: int + + +class Repository(object): + """A repository stores and manages backups for a single source. + + The repository handles metadata information around backups, manages the + schedule and tags and can expire revisions. + + A single backup for something (an RBD disk image, an S3 pool of + buckets, ...) is called a revision and thus we use "backup" synomymously + with "revision". + + The actual implementation of making and restoring backups as well as + storing the data is provided by the `source` implementations. + + """ + + path: Path + report_path: Path + schedule: Schedule + history: List[Revision] + report_ids: List[str] + log: BoundLogger + + _by_uuid: dict[str, Revision] + _lock_fds: dict[str, IO] + + def __init__( + self, + path: Path, + schedule: Schedule, + log: BoundLogger, + ): + self.path = path.resolve() + self.report_path = self.path / "quarantine" + self.schedule = schedule + self.log = log.bind(subsystem="repo", job_name=self.name) + self._lock_fds = {} + + def connect(self): + self.path.mkdir(exist_ok=True) + self.report_path.mkdir(exist_ok=True) + self.scan() + self.scan_reports() + + def to_dict(self) -> dict[str, Any]: + return { + "schedule": self.schedule.to_dict(), + "path": str(self.path), + } + + def add_report(self, report: ProblemReport) -> None: + self.log.info("add-report", uuid=report.uuid) + report.store(self.report_path, self.log) + self.report_ids.append(report.uuid) + + def scan_reports(self) -> None: + self.report_ids = [ + g.name.removesuffix(".report") + for g in self.report_path.glob("*.report") + ] + self.log.debug("scan-reports", entries=len(self.report_ids)) + + @property + def sla(self) -> bool: + """Is the SLA currently held? + + The SLA being held is only reflecting the current status. + + It does not help to reflect on past situations that have failed as + those are not indicators whether and admin needs to do something + right now. + """ + return not self.sla_overdue + + @property + def sla_overdue(self) -> int: + """Amount of time the SLA is currently overdue.""" + if not self.clean_history: + return 0 + age = backy.utils.now() - self.clean_history[-1].timestamp + max_age = min(x["interval"] for x in self.schedule.schedule.values()) + if age > max_age * 1.5: + return age.total_seconds() + return 0 + + # Locking strategy: + # + # - You can only run one backup of a machine at a time, as the backup will + # interact with this machines' list of snapshots and will get confused + # if run in parallel. + # - You can restore while a backup is running. + # - You can only purge while nothing else is happening. + # - Trying to get a shared lock (specifically purge) will block and wait + # whereas trying to get an exclusive lock (running backups, purging) will + # immediately give up. + # - Locking is not re-entrant. It's forbidden and protected to call another + # locking main function. + + @staticmethod + def locked( + target: str, + mode: Literal["shared", "exclusive"], + repo_attr: Optional[str] = None, + ): + def wrap(f): + def locked_function(self, *args, skip_lock=False, **kw): + if skip_lock: + return f(self, *args, **kw) + if repo_attr: + repo = getattr(self, repo_attr) + else: + repo = self + with repo.lock(target, mode, f.__name__): + return f(self, *args, **kw) + + locked_function.__name__ = "locked({}, {})".format( + f.__qualname__, target + ) + return locked_function + + return wrap + + @contextlib.contextmanager + def lock( + self, + target: str, + mode: Literal["shared", "exclusive"], + logname="", + ): + if mode == "shared": + mode_ = fcntl.LOCK_SH + elif mode == "exclusive": + mode_ = fcntl.LOCK_EX | fcntl.LOCK_NB + else: + raise ValueError("Unknown lock mode '{}'".format(mode)) + + if ( + target in self._lock_fds + ): # FIXME: should this be a class var? dict(path->lock) + raise RuntimeError("Bug: Locking is not re-entrant.") + target_path = self.path / target + if not target_path.exists(): + target_path.touch() + self._lock_fds[target] = target_path.open() + try: + fcntl.flock(self._lock_fds[target], mode_) + except BlockingIOError: + self.log.warning( + "lock-failed", + _fmt_msg="Failed to get '{mode}' lock on '{target}' for '{function}'.", + mode=mode, + target=target, + function=logname, + ) + raise + else: + try: + yield + finally: + fcntl.flock(self._lock_fds[target], fcntl.LOCK_UN) + finally: + self._lock_fds[target].close() + del self._lock_fds[target] + + @property + def name(self) -> str: + return self.path.name + + def scan(self) -> None: + self.history = [] + self._by_uuid = {} + for f in self.path.glob("*.rev"): + if f.is_symlink(): + # Ignore links that are used to create readable pointers + continue + r = Revision.load(f, self, self.log) + if r.uuid not in self._by_uuid: + self._by_uuid[r.uuid] = r + self.history.append(r) + # The history is stored: oldest first. newest last. + self.history.sort(key=lambda r: r.timestamp) + + def touch(self): + self.path.touch() + + def set_purge_pending(self): + self.path.joinpath(".purge_pending").touch() + + def clear_purge_pending(self): + self.path.joinpath(".purge_pending").unlink(missing_ok=True) + + def get_history( + self, *, clean: bool = False, local: bool = False + ) -> list[Revision]: + return [ + rev + for rev in self.history + if (not clean or "duration" in rev.stats) + and (not local or not rev.server) + ] + + @property + def clean_history(self) -> List[Revision]: + """History without incomplete revisions.""" + return self.get_history(clean=True) + + @property + def local_history(self): + """History without incomplete revisions.""" + return self.get_history(local=True) + + @property + def contains_distrusted(self) -> bool: + return any( + ( + r == Trust.DISTRUSTED + for r in self.get_history(clean=True, local=True) + ) + ) + + def validate_tags(self, tags): + missing_tags = ( + filter_schedule_tags(tags) - self.schedule.schedule.keys() + ) + if missing_tags: + self.log.error( + "unknown-tags", + _fmt_msg="The following tags are missing from the schedule: " + "{unknown_tags}\n" + "Check the config file, add the `manual:` prefix or disable " + "tag validation (-f)", + unknown_tags=", ".join(missing_tags), + ) + raise RuntimeError("Unknown tags") + + def prevent_remote_rev(self, revs: Optional[List[Revision]] = None): + revs = revs if revs is not None else self.history + remote = [r for r in revs if r.server] + if remote: + self.log.error( + "remote-revs-disallowed", + _fmt_msg="Can not modify trust state of remote revisions " + "locally.\n" + "Either include a filter to exclude them (local)\n" + "or edit them on the origin server and pull the changes " + "(backy pull)", + revisions=",".join(r.uuid for r in remote), + ) + raise RuntimeError("Remote revs disallowed") + + ################# + # Making backups + + @locked(target=".backup", mode="exclusive") + def _clean(self) -> None: + """Clean-up incomplete revisions.""" + for revision in self.local_history: + if "duration" not in revision.stats: + self.log.warning( + "clean-incomplete", revision_uuid=revision.uuid + ) + revision.remove() + + @locked(target=".backup", mode="exclusive") + def rm(self, revs: Iterable[Revision]) -> None: + for r in revs: + r.remove() + + @locked(target=".backup", mode="exclusive") + def expire(self): + self.schedule.expire(self) + + @locked(target=".backup", mode="exclusive") + def tags( + self, + action: Literal["set", "add", "remove"], + revision: str, + tags: set[str], + expect: Optional[set[str]] = None, + autoremove: bool = False, + force=False, + ) -> bool: + self.scan() + revs = self.find_revisions(revision) + if not force and action != "remove": + self.validate_tags(tags) + for r in revs: + if expect is not None and expect != r.tags: + self.log.error("tags-expectation-failed") + return False + for r in revs: + match action: + case "set": + r.tags = tags + case "add": + r.tags |= tags + case "remove": + r.tags -= tags + case _: + raise ValueError(f"invalid action '{action}'") + if not r.tags and autoremove: + r.remove() + else: + r.write_info() + return True + + @locked(target=".backup", mode="exclusive") + def distrust(self, revs: Iterable[Revision]) -> None: + for r in revs: + assert not r.server + r.distrust() + r.write_info() + + ###################### + # Looking up revisions + + def last_by_tag(self) -> dict[str, datetime.datetime]: + """Return a dictionary showing the last time each tag was + backed up. + + Tags that have never been backed up won't show up here. + + """ + last_times: dict[str, datetime.datetime] = {} + for revision in self.clean_history: + for tag in revision.tags: + last_times.setdefault(tag, min_date()) + last_times[tag] = max([last_times[tag], revision.timestamp]) + return last_times + + def find_revisions( + self, spec: str | List[str | Revision | List[Revision]] + ) -> List[Revision]: + """Get a sorted list of revisions, oldest first, that match the given + specification. + """ + + tokens: List[str | Revision | List[Revision]] + if isinstance(spec, str): + tokens = [ + t.strip() + for t in re.split(r"(\(|\)|,|&|\.\.)", spec) + if t.strip() + ] + else: + tokens = spec + if "(" in tokens and ")" in tokens: + i = list_rindex(tokens, "(") + j = tokens.index(")", i) + prev, middle, next = tokens[:i], tokens[i + 1 : j], tokens[j + 1 :] + + functions = { + "first": lambda x: x[0], + "last": lambda x: x[-1], + "not": lambda x: [r for r in self.history if r not in x], + "reverse": lambda x: list(reversed(x)), + } + if prev and isinstance(prev[-1], str) and prev[-1] in functions: + return self.find_revisions( + prev[:-1] + + [functions[prev[-1]](self.find_revisions(middle))] + + next + ) + return self.find_revisions( + prev + [self.find_revisions(middle)] + next + ) + elif "," in tokens: + i = tokens.index(",") + return unique( + self.find_revisions(tokens[:i]) + + self.find_revisions(tokens[i + 1 :]) + ) + elif "&" in tokens: + i = tokens.index("&") + return duplicates( + self.find_revisions(tokens[:i]), + self.find_revisions(tokens[i + 1 :]), + ) + elif ".." in tokens: + _a, _b = list_split(tokens, "..") + assert len(_a) <= 1 and len(_b) <= 1 + a = self.index_by_token(list_get(_a, 0, "first")) + b = self.index_by_token(list_get(_b, 0, "last")) + return self.history[ceil(min(a, b)) : floor(max(a, b)) + 1] + assert len(tokens) == 1 + token = tokens[0] + if isinstance(token, Revision): + return [token] + elif isinstance(token, list): + return token + if token.startswith("server:"): + server = token.removeprefix("server:") + return [r for r in self.history if server == r.server] + elif token.startswith("tag:"): + tag = token.removeprefix("tag:") + return [r for r in self.history if tag in r.tags] + elif token.startswith("trust:"): + trust = Trust(token.removeprefix("trust:").lower()) + return [r for r in self.history if trust == r.trust] + elif token == "all": + return self.history[:] + elif token == "clean": + return self.clean_history + elif token == "local": + return self.find_revisions("server:") + elif token == "remote": + return self.find_revisions("not(server:)") + else: + return [self.find(token)] + + def index_by_token(self, spec: str | Revision | List[Revision]) -> float: + assert not isinstance( + spec, list + ), "can only index a single revision specifier" + if isinstance(spec, str): + return self.index_by_date(spec) or self.history.index( + self.find(spec) + ) + else: + return self.history.index(spec) + + def index_by_date(self, spec: str) -> Optional[float]: + """Return index of revision matched by datetime. + Index may be fractional if there is no exact datetime match. + Index range: [-0.5, len+0.5] + """ + try: + date = datetime.datetime.fromisoformat(spec) + date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone()) + L = list_get( + [i for i, r in enumerate(self.history) if r.timestamp <= date], + -1, + -1, + ) + r = list_get( + [i for i, r in enumerate(self.history) if r.timestamp >= date], + 0, + len(self.history), + ) + print(spec, L, r) + assert 0 <= r - L <= 1, ( + "can not index with date if multiple revision have the same " + "timestamp" + ) + return (L + r) / 2.0 + except ValueError: + return None + + def find_by_number(self, _spec: str) -> Revision: + """Returns revision by relative number. + + 0 is the newest, + 1 is the next older, + 2 is the even next older, + and so on ... + + Raises IndexError or ValueError if no revision is found. + """ + spec = int(_spec) + if spec < 0: + raise KeyError("Integer revisions must be positive") + return self.history[-spec - 1] + + def find_by_tag(self, spec: str) -> Revision: + """Returns the latest revision matching a given tag. + + Raises IndexError or ValueError if no revision is found. + """ + if spec in ["last", "latest"]: + return self.history[-1] + if spec == "first": + return self.history[0] + raise ValueError() + + def find_by_uuid(self, spec: str) -> Revision: + """Returns revision matched by UUID. + + Raises IndexError if no revision is found. + """ + try: + return self._by_uuid[spec] + except KeyError: + raise IndexError() + + def find_by_function(self, spec: str) -> Revision: + m = re.fullmatch(r"(\w+)\(.+\)", spec) + if m and m.group(1) in ["first", "last"]: + return self.find_revisions(m.group(0))[0] + raise ValueError() + + def find(self, spec: str) -> Revision: + """Flexible revision search. + + Locates a revision by relative number, by tag, or by uuid. + + """ + spec = spec.strip() + if spec == "" or not self.history: + raise KeyError(spec) + + for find in ( + self.find_by_number, + self.find_by_uuid, + self.find_by_tag, + self.find_by_function, + ): + try: + return find(spec) + except (ValueError, IndexError): + pass + self.log.warning("find-rev-not-found", spec=spec) + raise KeyError(spec) diff --git a/src/backy/revision.py b/src/backy/revision.py index 9f67fbdf..3c49637e 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -1,19 +1,17 @@ import datetime from enum import Enum from pathlib import Path -from typing import IO, TYPE_CHECKING, Literal, Optional +from typing import TYPE_CHECKING, Optional import shortuuid import yaml from structlog.stdlib import BoundLogger from . import utils -from .backends import select_backend from .utils import SafeFile if TYPE_CHECKING: - from .backends import BackyBackend - from .backup import Backup + from .repository import Repository TAG_MANUAL_PREFIX = "manual:" @@ -34,25 +32,24 @@ def filter_manual_tags(tags): class Revision(object): - backup: "Backup" + repository: "Repository" uuid: str timestamp: datetime.datetime stats: dict tags: set[str] orig_tags: set[str] trust: Trust = Trust.TRUSTED - backend_type: Literal["cowfile", "chunked"] = "chunked" server: str = "" log: BoundLogger def __init__( self, - backup: "Backup", + repository: "Repository", log: BoundLogger, uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, ) -> None: - self.backup = backup + self.repository = repository self.uuid = uuid if uuid else shortuuid.uuid() self.timestamp = timestamp if timestamp else utils.now() self.stats = {"bytes_written": 0} @@ -63,23 +60,20 @@ def __init__( @classmethod def create( cls, - backup: "Backup", + repository: "Repository", tags: set[str], log: BoundLogger, *, uuid: Optional[str] = None, ) -> "Revision": - r = Revision(backup, log, uuid) + r = Revision(repository, log, uuid) r.tags = tags - r.backend_type = backup.default_backend_type return r - @property - def backend(self) -> "BackyBackend": - return select_backend(self.backend_type)(self, self.log) - @classmethod - def load(cls, file: Path, backup: "Backup", log: BoundLogger) -> "Revision": + def load( + cls, file: Path, backup: "Repository", log: BoundLogger + ) -> "Revision": with file.open(encoding="utf-8") as f: metadata = yaml.safe_load(f) r = cls.from_dict(metadata, backup, log) @@ -98,19 +92,13 @@ def from_dict(cls, metadata, backup, log): r.server = metadata.get("server", "") # Assume trusted by default to support migration r.trust = Trust(metadata.get("trust", Trust.TRUSTED.value)) - # If the metadata does not show the backend type, then it's cowfile. - r.backend_type = metadata.get("backend_type", "cowfile") return r - @property - def filename(self) -> Path: - """Full pathname of the image file.""" - return self.backup.path / self.uuid - @property def info_filename(self) -> Path: """Full pathname of the metadata file.""" - return self.filename.with_suffix(self.filename.suffix + ".rev") + p = self.repository.path / self.uuid + return p.with_suffix(p.suffix + ".rev") def materialize(self) -> None: self.write_info() @@ -126,7 +114,6 @@ def write_info(self) -> None: def to_dict(self) -> dict: return { "uuid": self.uuid, - "backend_type": self.backend_type, "timestamp": self.timestamp, "parent": getattr( self.get_parent(), "uuid", "" @@ -138,6 +125,7 @@ def to_dict(self) -> dict: "server": self.server, } + # TODO: disallow local modification @property def pending_changes(self): return self.server and self.tags != self.orig_tags @@ -159,32 +147,25 @@ def remove(self, force=False) -> None: self.tags = set() self.write_info() else: - for filename in self.filename.parent.glob(self.filename.name + "*"): - if filename.exists(): - self.log.debug("remove-start", filename=filename) - filename.unlink() - self.log.debug("remove-end", filename=filename) + if self.info_filename.exists(): + self.log.debug("remove-start", filename=str(self.info_filename)) + self.info_filename.unlink() + self.log.debug("remove-end", filename=str(self.info_filename)) - if self in self.backup.history: - self.backup.history.remove(self) - del self.backup._by_uuid[self.uuid] + if self in self.repository.history: + self.repository.history.remove(self) + del self.repository._by_uuid[self.uuid] def writable(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o640) self.info_filename.chmod(0o640) def readonly(self) -> None: - if self.filename.exists(): - self.filename.chmod(0o440) self.info_filename.chmod(0o440) def get_parent(self, ignore_trust=False) -> Optional["Revision"]: """defaults to last rev if not in history""" prev = None - for r in self.backup.history: - if r.backend_type != self.backend_type: - continue + for r in self.repository.history: if not ignore_trust and r.trust == Trust.DISTRUSTED: continue if r.server != self.server: diff --git a/src/backy/s3/__init__.py b/src/backy/s3/__init__.py new file mode 100644 index 00000000..a144c2f2 --- /dev/null +++ b/src/backy/s3/__init__.py @@ -0,0 +1,54 @@ +# Placeholder for future S3 implementation +from argparse import _ActionsContainer +from dataclasses import dataclass +from typing import Any, Iterable + +from structlog.stdlib import BoundLogger + +from backy.repository import Repository +from backy.revision import Revision +from backy.source import RestoreArgs, RestoreArgsType, Source, SourceType + + +@dataclass(frozen=True) +class S3RestoreArgs(RestoreArgs): + def to_cmdargs(self) -> Iterable[str]: + return [] + + @classmethod + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: + pass + + @classmethod + def from_args(cls: type[RestoreArgsType], **kw: Any) -> RestoreArgsType: + return cls() + + +class S3Source(Source): + type_ = "s3" + restore_type = S3RestoreArgs + + @classmethod + def from_config( + cls: type[SourceType], + repository: "Repository", + config: dict[str, Any], + log: BoundLogger, + ) -> SourceType: + raise NotImplementedError() + + def backup(self, revision: "Revision") -> bool: + raise NotImplementedError() + + def restore(self, revision: "Revision", args: RestoreArgsType): + raise NotImplementedError() + + def verify(self, revision: "Revision"): + raise NotImplementedError() + + def gc(self) -> None: + raise NotImplementedError() + + +def main(): + raise NotImplementedError() diff --git a/src/backy/schedule.py b/src/backy/schedule.py index 244712b5..e140ab9d 100644 --- a/src/backy/schedule.py +++ b/src/backy/schedule.py @@ -1,10 +1,13 @@ import copy import datetime from datetime import timedelta -from typing import Dict +from typing import TYPE_CHECKING, Dict, Iterable, List, Set, Tuple import backy.utils -from backy.revision import filter_schedule_tags +from backy.revision import Revision, filter_schedule_tags + +if TYPE_CHECKING: + from backy.repository import Repository MINUTE = 60 HOUR = 60 * MINUTE @@ -57,21 +60,29 @@ def __init__(self): self.schedule = {} self.config = {} - def configure(self, config): + def configure(self, config: dict) -> None: self.config = config self.schedule = copy.deepcopy(config) for tag, spec in self.schedule.items(): self.schedule[tag]["interval"] = parse_duration(spec["interval"]) - def to_dict(self): + def to_dict(self) -> dict: return self.config - def next(self, relative, spread, archive): + @classmethod + def from_dict(cls, conf) -> "Schedule": + r = cls() + r.configure(conf) + return r + + def next( + self, relative: datetime.datetime, spread: int, repository: "Repository" + ) -> Tuple[datetime.datetime, Set[str]]: time, tags = ideal_time, ideal_tags = self._next_ideal(relative, spread) - missed_tags = self._missed(archive) + missed_tags = self._missed(repository) # The next run will include all missed tags tags.update(missed_tags) - if missed_tags and len(archive.history): + if missed_tags and len(repository.history): # Perform an immediate backup if we have any history at all. # and when we aren't running a regular backup within the next # 5 minutes anyway. @@ -81,7 +92,9 @@ def next(self, relative, spread, archive): tags = missed_tags return time, tags - def _next_ideal(self, relative, spread): + def _next_ideal( + self, relative: datetime.datetime, spread: int + ) -> Tuple[datetime.datetime, Set[str]]: next_times: Dict[datetime.datetime, set] = {} for tag, settings in self.schedule.items(): t = next_times.setdefault( @@ -92,11 +105,11 @@ def _next_ideal(self, relative, spread): next_tags = next_times[next_time] return next_time, next_tags - def _missed(self, archive): + def _missed(self, repository: "Repository") -> Set[str]: # Check whether we missed any now = backy.utils.now() missing_tags = set(self.schedule.keys()) - for tag, last in archive.last_by_tag().items(): + for tag, last in repository.last_by_tag().items(): if tag not in self.schedule: # Ignore ad-hoc tags for catching up. continue @@ -105,19 +118,19 @@ def _missed(self, archive): missing_tags.remove(tag) return missing_tags - def expire(self, backup): + def expire(self, repository: "Repository") -> List["Revision"]: """Remove old revisions according to the backup schedule. Returns list of removed revisions. """ - backup.scan() + repository.scan() removed = [] # Clean out old backups: keep at least a certain number of copies # (keep) and ensure that we don't throw away copies that are newer # than keep * interval for this tag. # Phase 1: remove tags that are expired for tag, args in self.schedule.items(): - revisions = backup.find_revisions("tag:" + tag) + revisions = repository.find_revisions("tag:" + tag) keep = args["keep"] if len(revisions) < keep: continue @@ -129,7 +142,7 @@ def expire(self, backup): old_revision.write_info() # Phase 2: remove all tags which have been created by a former schedule - for revision in backup.history: + for revision in repository.history: expired_tags = ( filter_schedule_tags(revision.tags) - self.schedule.keys() ) @@ -140,7 +153,7 @@ def expire(self, backup): # Phase 3: delete revisions that have no tags any more. # We are deleting items of the history while iterating over it. # Use a copy of the list! - for revision in list(backup.history): + for revision in list(repository.history): if revision.tags: continue removed.append(revision) @@ -148,7 +161,7 @@ def expire(self, backup): return removed - def sorted_tags(self, tags): + def sorted_tags(self, tags: Iterable[str]) -> Iterable[str]: """Return a list of tags, sorted by their interval. Smallest first.""" t = {} for tag in tags: diff --git a/src/backy/source.py b/src/backy/source.py new file mode 100644 index 00000000..07fd9b8b --- /dev/null +++ b/src/backy/source.py @@ -0,0 +1,403 @@ +import argparse +import asyncio +import errno +import filecmp +import subprocess +from abc import ABC, abstractmethod +from argparse import ArgumentParser, _ActionsContainer +from dataclasses import dataclass +from importlib.metadata import entry_points +from pathlib import Path +from typing import Any, Generic, Iterable, Optional, TypeVar, cast + +import structlog +import yaml +from structlog.stdlib import BoundLogger + +from backy import logging +from backy.repository import Repository +from backy.revision import Revision +from backy.schedule import Schedule +from backy.utils import SafeFile, generate_taskid + +SOURCE_PLUGINS = entry_points(group="backy.sources") + + +def factory_by_type(type_) -> type["Source"]: + return SOURCE_PLUGINS[type_].load() + + +RestoreArgsType = TypeVar("RestoreArgsType", bound="RestoreArgs") + +SourceType = TypeVar("SourceType", bound="Source") + + +@dataclass(frozen=True) +class RestoreArgs(ABC): + @abstractmethod + def to_cmdargs(self) -> Iterable[str]: + ... + + @classmethod + @abstractmethod + def setup_argparse(cls, restore_parser: _ActionsContainer) -> None: + ... + + @classmethod + @abstractmethod + def from_args(cls: type[RestoreArgsType], **kw: Any) -> RestoreArgsType: + ... + + +class Source(ABC, Generic[RestoreArgsType]): + """A source provides specific implementations for making and restoring + backups. + + There are three major aspects provided by a source implementation: + + 1. Extracting data from another system (e.g. Ceph RBD or S3). + + 2. Storing that data in the repository directory. + + 3. Restoring data, typically providing different workflows: + + - full restore into the original system (e.g. into an RBD image) + - full restore into another system (e.g. into a local image file) + - partial restore (e.g. allowing interactive access to a loop mounted version of the image) + + Additionally a few house keeping tasks need to be implemented: + + - garbage collection, to remove data that isn't needed after revisions + have expired + + - verification of stored data to protect against low level corruption + + + Implementations can be split into two parts: + + - a light shim as a Python class that can interact with the + rest of the backy code within Python + + - a subprocess that backy interacts with to trigger the actual work. + + """ + + type_: str + restore_type: type[RestoreArgsType] + repository: "Repository" + + def __init__(self, repository: "Repository"): + self.repository = repository + + @classmethod + @abstractmethod + def from_config( + cls: type[SourceType], + repository: "Repository", + config: dict[str, Any], + log: BoundLogger, + ) -> SourceType: + ... + + # @abstractmethod + # def to_config(self) -> dict[str, Any]: + # ... + + @abstractmethod + def backup(self, revision: "Revision") -> bool: + ... + + @abstractmethod + def restore(self, revision: "Revision", args: RestoreArgsType): + ... + + @abstractmethod + def verify(self, revision: "Revision"): + ... + + @abstractmethod + def gc(self) -> None: + ... + + @classmethod + def create_argparse(cls) -> ArgumentParser: + parser = argparse.ArgumentParser( + prog=f"backy-{cls.type_}", + description=f"The {cls.type_} plugin for backy.\n" + "You should not call this directly. Use the backy command instead.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose output" + ) + # parser.add_argument( + # "-c", + # "--config", + # type=Path, + # default="/etc/backy.conf", + # help="(default: %(default)s)", + # ) + parser.add_argument( + "-C", + dest="workdir", + default=".", + type=Path, + help=( + "Run as if backy was started in instead of the current " + "working directory." + ), + ) + parser.add_argument( + "-t", + "--taskid", + default=generate_taskid(), + help="ID to include in log messages (default: 4 random base32 chars)", + ) + + subparsers = parser.add_subparsers() + + # BACKUP + p = subparsers.add_parser( + "backup", + help="Perform a backup", + ) + p.set_defaults(func="backup") + # TODO: decide if the rev should be created + p.add_argument("revision", help="Revision to create.") + + # RESTORE + p = subparsers.add_parser( + "restore", + help="Restore (a given revision) to a given target", + ) + p.add_argument("revision", help="Revision to restore.") + cls.restore_type.setup_argparse(p) + p.set_defaults(func="restore") + + # GC + p = subparsers.add_parser( + "gc", + help="Remove unused data from the repository.", + ) + p.set_defaults(func="gc") + + # VERIFY + p = subparsers.add_parser( + "verify", + help="Verify specified revision", + ) + p.add_argument("revision", help="Revision to work on.") + p.set_defaults(func="verify") + + return parser + + @classmethod + def main(cls, *str_args: str) -> int: + parser = cls.create_argparse() + + args = parser.parse_args(str_args[1:]) + + if not hasattr(args, "func"): + parser.print_usage() + return 0 + + # Logging + logging.init_logging( + args.verbose, + args.workdir / "backy.log", + defaults={"taskid": args.taskid}, + ) + log = structlog.stdlib.get_logger(subsystem="command") + log.debug("invoked", args=" ".join(str_args)) + + try: + source = CmdLineSource.load(args.workdir, log).create_source(cls) + + ret = 0 + match args.func: + case "backup": + rev = source.repository.find_by_uuid(args.revision) + success = source.backup(rev) + ret = int(not success) + case "restore": + rev = source.repository.find_by_uuid(args.revision) + source.restore( + rev, + cls.restore_type.from_args(**dict(args._get_kwargs())), + ) + case "gc": + source.gc() + case "verify": + rev = source.repository.find_by_uuid(args.revision) + source.verify(rev) + case _: + raise ValueError("invalid function: " + args.fun) + log.debug("return-code", code=ret) + return ret + except Exception as e: + if isinstance(e, IOError) and e.errno in [ + errno.EDEADLK, + errno.EAGAIN, + ]: + log.warning("repo-currently-locked") + else: + log.exception("failed") + return 1 + + +class CmdLineSource: + repository: "Repository" + source_conf: dict[str, Any] + log: BoundLogger + + @property + def type_(self): + return self.source_conf["type"] + + @property + def restore_type(self): + return factory_by_type(self.type_).restore_type + + @property + def subcommand(self) -> str: + return "backy-" + self.type_ + + @property + def taskid(self): + return self.log._context.get( + "subtaskid", self.log._context.get("taskid", generate_taskid()) + ) + + def __init__( + self, + repository: "Repository", + source_conf: dict[str, Any], + log: BoundLogger, + ): + self.repository = repository + self.source_conf = source_conf + self.log = log.bind(subsystem="cmdlinesource") + + @classmethod + def from_config( + cls, config: dict[str, Any], log: BoundLogger + ) -> "CmdLineSource": + schedule = Schedule() + schedule.configure(config["schedule"]) + repo = Repository(Path(config["path"]), schedule, log) + repo.connect() + return cls(repo, config["source"], log) + + @classmethod + def load(cls, path: Path, log: BoundLogger) -> "CmdLineSource": + path = path / "config" + try: + with path.open(encoding="utf-8") as f: + config = yaml.safe_load(f) + return cls.from_config(config, log) + except IOError: + log.error( + "source-config-error", + _fmt_msg="Could not read source config file. Is the path correct?", + config_path=str(path), + ) + raise + + def to_config(self) -> dict[str, Any]: + return { + "path": str(self.repository.path), + "source": self.source_conf, + "schedule": self.repository.schedule.config, + } + + def store(self) -> None: + """Writes config file for 'backy-' subprocess.""" + + # We do not want to create leading directories, only + # the backup directory itself. If the base directory + # does not exist then we likely don't have a correctly + # configured environment. + self.repository.path.mkdir(exist_ok=True) + config = self.repository.path / "config" + with SafeFile(config, encoding="utf-8") as f: + f.open_new("wb") + yaml.safe_dump(self.to_config(), f) + if config.exists() and filecmp.cmp(config, f.name): + raise ValueError("not changed") + + def create_source( + self, sourcetype: Optional[type[SourceType]] = None + ) -> SourceType: + if sourcetype: + sourcetype_ = sourcetype + else: + try: + sourcetype_ = cast( + type[SourceType], factory_by_type(self.type_) + ) + except KeyError: + self.log.error( + "unknown-source-type", + _fmt_msg="Unknown source type '{type}'.", + type=self.type_, + ) + raise + + return sourcetype_.from_config( + self.repository, self.source_conf, self.log + ) + + def run(self, *args): + return self.invoke( + self.subcommand, + "-t", + self.taskid, + "-C", + str(self.repository.path), + *args, + ) + + def invoke(self, *args): + self.log.info("run", cmd=" ".join(args)) + proc = subprocess.run(args) + self.log.debug("run-finished", return_code=proc.returncode) + return proc.returncode + + def backup(self, revision: "Revision"): + return self.run("backup", revision.uuid) + + def restore(self, revision: "Revision", args: RestoreArgsType): + return self.run("restore", revision.uuid, *args.to_cmdargs()) + + def verify(self, revision: "Revision"): + return self.run("verify", revision.uuid) + + def gc(self): + return self.run("gc") + + +class AsyncCmdLineSource(CmdLineSource): + async def invoke(self, *args): + self.log.info("run", cmd=" ".join(args)) + proc = await asyncio.create_subprocess_exec( + *args, + start_new_session=True, # Avoid signal propagation like Ctrl-C. + close_fds=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + try: + return_code = await proc.wait() + self.log.debug( + "run-finished", + return_code=return_code, + subprocess_pid=proc.pid, + ) + return return_code + except asyncio.CancelledError: + self.log.warning("run-cancelled", subprocess_pid=proc.pid) + try: + proc.terminate() + except ProcessLookupError: + pass + raise diff --git a/src/backy/sources/__init__.py b/src/backy/sources/__init__.py deleted file mode 100644 index 23e63a3d..00000000 --- a/src/backy/sources/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -from abc import ABC, abstractmethod -from importlib.metadata import entry_points -from typing import Type - -from structlog.stdlib import BoundLogger - -import backy.revision - - -class BackySource(ABC): - @abstractmethod - def backup(self, target: "backy.backends.BackyBackend") -> None: - ... - - @abstractmethod - def verify(self, target: "backy.backends.BackyBackend") -> bool: - ... - - -class BackySourceContext(ABC): - @abstractmethod - def __enter__(self) -> BackySource: - ... - - def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): - pass - - -class BackySourceFactory(ABC): - @abstractmethod - def __init__(self, config: dict, log: BoundLogger) -> None: - ... - - @abstractmethod - def __call__( - self, revision: "backy.revision.Revision" - ) -> BackySourceContext: - ... - - @abstractmethod - def ready(self) -> bool: - """Check whether the source can be backed up.""" - ... - - -def select_source(type_: str) -> Type[BackySourceFactory]: - match type_: - case "flyingcircus": - from backy.sources.flyingcircus.source import FlyingCircusRootDisk - - return FlyingCircusRootDisk - case "ceph-rbd": - from backy.sources.ceph.source import CephRBD - - return CephRBD - case "file": - from backy.sources.file import File - - return File - case _: - raise ValueError(f"invalid backend: {type_}") diff --git a/src/backy/sources/ceph/__init__.py b/src/backy/sources/ceph/__init__.py deleted file mode 100644 index 623db0e2..00000000 --- a/src/backy/sources/ceph/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from subprocess import PIPE, run - - -def detect_whole_object_support(): - result = run( - ["rbd", "help", "export-diff"], stdout=PIPE, stderr=PIPE, check=True - ) - return "--whole-object" in result.stdout.decode("ascii") - - -try: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = detect_whole_object_support() -except Exception: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = False diff --git a/src/backy/sources/ceph/diff.py b/src/backy/sources/ceph/diff.py deleted file mode 100644 index b7c712ac..00000000 --- a/src/backy/sources/ceph/diff.py +++ /dev/null @@ -1,157 +0,0 @@ -import struct -from collections import namedtuple -from typing import IO, Optional - -from backy.fallocate import punch_hole - - -def unpack_from(fmt, f): - size = struct.calcsize(fmt) - b = f.read(size) - return struct.unpack(fmt, b) - - -Zero = namedtuple("Zero", ["start", "length"]) -Data = namedtuple("Data", ["start", "length", "stream"]) -SnapSize = namedtuple("SnapSize", ["size"]) -FromSnap = namedtuple("FromSnap", ["snapshot"]) -ToSnap = namedtuple("ToSnap", ["snapshot"]) - - -class RBDDiffV1(object): - f: IO - phase: str # header, metadata, data - record_type: Optional[str] - _streaming: bool - - header = b"rbd diff v1\n" - - def __init__(self, fh): - # self.filename = filename - self.f = fh - - self.phase = "header" - self.read_header() - self.record_type = None - self._streaming = False - - def read_header(self): - assert self.phase == "header" - header = self.f.read(len(self.header)) - if header != self.header: - raise ValueError("Unexpected header: {0!r}".format(header)) - self.phase = "metadata" - - def read_record(self): - if self.phase == "end": - return - assert not self._streaming, "Unread data from read_w. Consume first." - last_record_type = self.record_type - self.record_type = self.f.read(1).decode("ascii") - if self.record_type not in ["f", "t", "s", "w", "z", "e"]: - raise ValueError( - 'Got invalid record type "{}". Previous record: {}'.format( - self.record_type, last_record_type - ) - ) - method = getattr(self, "read_{}".format(self.record_type)) - return method() - - def read_fbytes(self, encoding=None): - length = unpack_from(" str: - # This wrapper function for the `rbd` command is only used for - # getting and interpreting text messages, making this the correct level for - # managing text encoding. - # Other use cases where binary data is piped to rbd have their own dedicated - # wrappers. - return subprocess.check_output( - cmdline, encoding=encoding, errors="replace" - ) - - def _rbd(self, cmd, format=None): - cmd = filter(None, cmd) - rbd = [RBD] - - rbd.extend(cmd) - - if format == "json": - rbd.append("--format=json") - - self.log.debug("executing-command", command=" ".join(rbd)) - result = self._ceph_cli(rbd) - - self.log.debug("executed-command", stdout=result) - if format == "json": - result = json.loads(result) - - return result - - def exists(self, snapspec: str): - try: - return self._rbd(["info", snapspec], format="json") - except subprocess.CalledProcessError as e: - if e.returncode == 2: - return False - raise - - def map(self, image: str, readonly=False): - def parse_mappings_pre_nautilus(mappings): - """The parser code for Ceph release Luminous and earlier.""" - for mapping in mappings.values(): - if image == "{pool}/{name}@{snap}".format(**mapping): - return mapping - raise RuntimeError("Map not found in mapping list.") - - def parse_mappings_since_nautilus(mappings): - """The parser code for Ceph release Nautilus and later.""" - for mapping in mappings: - if image == "{pool}/{name}@{snap}".format(**mapping): - return mapping - raise RuntimeError("Map not found in mapping list.") - - versionstring = self._rbd(["--version"]) - - self._rbd(["map", image, "--read-only" if readonly else ""]) - - mappings_raw = self._rbd(["showmapped"], format="json") - - if "nautilus" in versionstring: - mapping = parse_mappings_since_nautilus(mappings_raw) - elif "luminous" in versionstring: - mapping = parse_mappings_pre_nautilus(mappings_raw) - else: - # our jewel build provides no version info - # this will break with releases newer than nautilus - mapping = parse_mappings_pre_nautilus(mappings_raw) - - def scrub_mapping(mapping): - SPEC = {"pool", "name", "snap", "device"} - # Ensure all specced keys exist - for key in SPEC: - if key not in mapping: - raise KeyError( - f"Missing key `{key}` in mapping {mapping!r}" - ) - # Scrub all non-specced keys - for key in list(mapping): - if key not in SPEC: - del mapping[key] - return mapping - - return scrub_mapping(mapping) - - def unmap(self, device): - self._rbd(["unmap", device]) - - def snap_create(self, image): - self._rbd(["snap", "create", image]) - - def snap_ls(self, image): - return self._rbd(["snap", "ls", image], format="json") - - def snap_rm(self, image): - return self._rbd(["snap", "rm", image]) - - @contextlib.contextmanager - def export_diff(self, new: str, old: str) -> Iterator[RBDDiffV1]: - self.log.info("export-diff") - if backy.sources.ceph.CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: - EXPORT_WHOLE_OBJECT = ["--whole-object"] - else: - EXPORT_WHOLE_OBJECT = [] - proc = subprocess.Popen( - [RBD, "export-diff", new, "--from-snap", old] - + EXPORT_WHOLE_OBJECT - + ["-"], - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - # Have a rather largish buffer size, so rbd has some room to - # push its data to, when we are busy writing. - bufsize=8 * CHUNK_SIZE, - ) - assert proc.stdout is not None - try: - yield RBDDiffV1(proc.stdout) - finally: - proc.stdout.close() - proc.wait() - - @contextlib.contextmanager - def image_reader(self, image: str) -> Iterator[BinaryIO]: - mapped = self.map(image, readonly=True) - source = open(mapped["device"], "rb", buffering=CHUNK_SIZE) - try: - yield source - finally: - source.close() - self.unmap(mapped["device"]) - - @contextlib.contextmanager - def export(self, image: str) -> Iterator[IO]: - self.log.info("export") - proc = subprocess.Popen( - [RBD, "export", image, "-"], - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - # Have a rather largish buffer size, so rbd has some room to - # push its data to, when we are busy writing. - bufsize=4 * CHUNK_SIZE, - ) - assert proc.stdout is not None - try: - yield proc.stdout - finally: - proc.stdout.close() - proc.wait() diff --git a/src/backy/sources/ceph/source.py b/src/backy/sources/ceph/source.py deleted file mode 100644 index 8d6623fc..00000000 --- a/src/backy/sources/ceph/source.py +++ /dev/null @@ -1,173 +0,0 @@ -import time - -from structlog.stdlib import BoundLogger - -import backy.backends -import backy.utils -from backy.revision import Revision, Trust - -from ...backends import BackyBackend -from ...quarantine import QuarantineReport -from .. import BackySource, BackySourceContext, BackySourceFactory -from .rbd import RBDClient - - -class CephRBD(BackySource, BackySourceFactory, BackySourceContext): - """The Ceph RBD source. - - Manages snapshots corresponding to revisions and provides a verification - that tries to balance reliability and performance. - """ - - pool: str - image: str - always_full: bool - log: BoundLogger - rbd: RBDClient - - def __init__(self, config: dict, log: BoundLogger): - self.pool = config["pool"] - self.image = config["image"] - self.always_full = config.get("full-always", False) - self.log = log.bind(subsystem="ceph") - self.rbd = RBDClient(self.log) - - def ready(self) -> bool: - """Check whether the source can be backed up. - - For RBD sources this means the volume exists and is accessible. - - """ - try: - if self.rbd.exists(self._image_name): - return True - except Exception: - self.log.exception("not-ready") - return False - - def __call__(self, revision): - self.revision = revision - return self - - def __enter__(self): - snapname = "backy-{}".format(self.revision.uuid) - self.create_snapshot(snapname) - return self - - def create_snapshot(self, snapname: str) -> None: - """An overridable method to allow different ways of creating the - snapshot. - """ - self.rbd.snap_create(self._image_name + "@" + snapname) - - @property - def _image_name(self) -> str: - return "{}/{}".format(self.pool, self.image) - - def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): - self._delete_old_snapshots() - - def backup(self, target: BackyBackend) -> None: - if self.always_full: - self.log.info("backup-always-full") - self.full(target) - return - revision = self.revision - while True: - parent = revision.get_parent() - if not parent: - self.log.info("backup-no-valid-parent") - self.full(target) - return - if not self.rbd.exists(self._image_name + "@backy-" + parent.uuid): - self.log.info( - "ignoring-rev-without-snapshot", - revision_uuid=parent.uuid, - ) - revision = parent - continue - # Ok, it's trusted and we have a snapshot. Let's do a diff. - break - self.diff(target, parent) - - def diff(self, target: BackyBackend, parent: Revision) -> None: - self.log.info("diff") - snap_from = "backy-" + parent.uuid - snap_to = "backy-" + self.revision.uuid - s = self.rbd.export_diff(self._image_name + "@" + snap_to, snap_from) - with s as source, target.open("r+b", parent) as target_: - bytes = source.integrate(target_, snap_from, snap_to) - self.log.info("diff-integration-finished") - - self.revision.stats["bytes_written"] = bytes - - # TMP Gather statistics to see where to optimize - from backy.backends.chunked.chunk import chunk_stats - - self.revision.stats["chunk_stats"] = chunk_stats - - def full(self, target: BackyBackend) -> None: - self.log.info("full") - s = self.rbd.export( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - copied = 0 - with s as source, target.open("r+b") as target_: - while True: - buf = source.read(4 * backy.utils.MiB) - if not buf: - break - target_.write(buf) - copied += len(buf) - self.revision.stats["bytes_written"] = copied - - # TMP Gather statistics to see if we actually are aligned. - from backy.backends.chunked.chunk import chunk_stats - - self.revision.stats["chunk_stats"] = chunk_stats - - def verify(self, target: BackyBackend) -> bool: - s = self.rbd.image_reader( - "{}/{}@backy-{}".format(self.pool, self.image, self.revision.uuid) - ) - self.revision.stats["ceph-verification"] = "partial" - - with s as source, target.open("rb") as target_: - self.log.info("verify") - return backy.utils.files_are_roughly_equal( - source, - target_, - report=lambda s, t, o: self.revision.backup.quarantine.add_report( - QuarantineReport(s, t, o) - ), - ) - - def _delete_old_snapshots(self) -> None: - # Clean up all snapshots except the one for the most recent valid - # revision. - # Previously we used to remove all snapshots but the one for this - # revision - which is wrong: broken new revisions would always cause - # full backups instead of new deltas based on the most recent valid - # one. - # XXX this will break if multiple servers are active - if not self.always_full and self.revision.backup.local_history: - keep_snapshot_revision = self.revision.backup.local_history[-1].uuid - else: - keep_snapshot_revision = None - for snapshot in self.rbd.snap_ls(self._image_name): - if not snapshot["name"].startswith("backy-"): - # Do not touch non-backy snapshots - continue - uuid = snapshot["name"].replace("backy-", "") - if uuid != keep_snapshot_revision: - time.sleep(3) # avoid race condition while unmapping - self.log.info( - "delete-old-snapshot", snapshot_name=snapshot["name"] - ) - try: - self.rbd.snap_rm(self._image_name + "@" + snapshot["name"]) - except Exception: - self.log.exception( - "delete-old-snapshot-failed", - snapshot_name=snapshot["name"], - ) diff --git a/src/backy/sources/ceph/tests/test_ceph_source.py b/src/backy/sources/ceph/tests/test_ceph_source.py deleted file mode 100644 index 71c34923..00000000 --- a/src/backy/sources/ceph/tests/test_ceph_source.py +++ /dev/null @@ -1,339 +0,0 @@ -import datetime -import io -import os.path as p -import subprocess -import time -from unittest import mock - -import pytest - -import backy.utils -from backy.backends.chunked import ChunkedFileBackend -from backy.backends.cowfile import COWFileBackend -from backy.revision import Revision -from backy.sources import select_source -from backy.sources.ceph.source import CephRBD - -BLOCK = backy.utils.PUNCH_SIZE - -with open(p.join(p.dirname(__file__), "nodata.rbddiff"), "rb") as f: - SAMPLE_RBDDIFF = f.read() - - -@pytest.fixture -def check_output(monkeypatch): - check_output = mock.Mock() - check_output.return_value = b"{}" - monkeypatch.setattr(subprocess, "check_output", check_output) - return check_output - - -@pytest.fixture -def ceph_rbd_imagesource(rbdclient, nosleep, log): - """Provides a CephRBD object configured for image pool/test, with rbd - being mocked away and allowing snapshots on that image.""" - source = CephRBD(dict(pool="test", image="foo"), log) - # rbdclient mock setup: - rbdclient._ceph_cli._register_image_for_snaps("test/foo") - source.rbd = rbdclient - return source - - -@pytest.fixture -def nosleep(monkeypatch): - monkeypatch.setattr(time, "sleep", lambda x: None) - - -def test_select_ceph_source(): - assert select_source("ceph-rbd") == CephRBD - - -def test_assign_revision(nosleep, log): - source = CephRBD(dict(pool="test", image="foo"), log) - revision = mock.Mock() - context_manager = source(revision) - assert context_manager.revision is revision - - -def test_context_manager(backup, ceph_rbd_imagesource, log): - """The imagesource context manager around a backup revision must create a - corresponding snapshot at enter, and clean up at exit.""" - source = ceph_rbd_imagesource - - revision = Revision.create(backup, set(), log, uuid="1") - with source(revision): - assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-1" - - assert len(source.rbd.snap_ls("test/foo")) == 0 - - -def test_context_manager_cleans_out_snapshots( - ceph_rbd_imagesource, backup, log -): - """The imagesource context manager cleans up unexpected backy snapshot revisions. - Snapshots without the prefix 'backy-' are left untouched.""" - source = ceph_rbd_imagesource - - # snaps without backy- prefix are left untouched - source.rbd.snap_create("test/foo@someother") - # unexpected revision snapshots are cleaned - source.rbd.snap_create("test/foo@backy-2") - - revision = Revision.create(backup, set(), log, uuid="1") - with source(revision): - revision.materialize() - backup.scan() - - assert source.rbd.snap_ls("test/foo") == [ - { - "id": 86925, - "name": "someother", - "protected": "false", - "size": 32212254720, - "timestamp": "Sun Feb 12 18:35:18 2023", - }, - { - "id": 86925, - "name": "backy-1", - "protected": "false", - "size": 32212254720, - "timestamp": "Sun Feb 12 18:35:18 2023", - }, - ] - - -def test_choose_full_without_parent(ceph_rbd_imagesource, backup, log): - """When backing up a revision without a parent, a full backup needs to happen. - The diff function must not be called.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision = Revision.create(backup, set(), log) - - with source(revision) as s: - s.backup(revision.backend) - - assert not source.diff.called - assert source.full.called - - -def test_choose_full_without_snapshot(ceph_rbd_imagesource, backup, log): - """When backing up a revision with an immediate parent that has no corresponding - snapshot, that parent must be ignored and a full backup has to be made. - The diff function must not be called.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision1 = Revision.create(backup, set(), log) - revision1.materialize() - - backup.scan() - - revision2 = Revision.create(backup, set(), log) - - with source(revision2): - source.backup(revision2.backend) - - assert not source.diff.called - assert source.full.called - - -def test_choose_diff_with_snapshot(ceph_rbd_imagesource, backup, log): - """In an environment where a parent revision exists and has a snapshot, both - revisions shall be diffed.""" - source = ceph_rbd_imagesource - - source.diff = mock.Mock() - source.full = mock.Mock() - - revision1 = Revision.create(backup, set(), log, uuid="a1") - revision1.materialize() - - # part of test setup: we check backy's behavior when a previous version not only - # exists, but also has a snapshot - source.rbd.snap_create("test/foo@backy-a1") - - backup.scan() - - revision2 = Revision.create(backup, set(), log) - - with source(revision2): - source.backup(revision2.backend) - - assert source.diff.called - assert not source.full.called - - -def test_diff_backup(ceph_rbd_imagesource, backup, tmp_path, log): - """When doing a diff backup between two revisions with snapshot, the RBDDiff needs - to be called properly, a snapshot for the new revision needs to be created and the - snapshot of the previous revision needs to be removed after the successfull backup.""" - from backy.sources.ceph.diff import RBDDiffV1 - - source = ceph_rbd_imagesource - - parent = Revision.create( - backup, set(), log, uuid="ed968696-5ab0-4fe0-af1c-14cadab44661" - ) - parent.materialize() - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create( - backup, set(), log, uuid="f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" - ) - revision.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - - with parent.backend.open("wb") as f: - f.write(b"asdf") - - backup.scan() - revision.materialize() - - # test setup: ensure that previous revision has a snapshot. It needs to be removed - # by the backup process - source.rbd.snap_create( - "test/foo@backy-ed968696-5ab0-4fe0-af1c-14cadab44661" - ) - - with mock.patch("backy.sources.ceph.rbd.RBDClient.export_diff") as export: - export.return_value = mock.MagicMock() - export.return_value.__enter__.return_value = RBDDiffV1( - io.BytesIO(SAMPLE_RBDDIFF) - ) - with source(revision): - source.diff(revision.backend, revision.get_parent()) - backup.history.append(revision) - export.assert_called_with( - "test/foo@backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802", - "backy-ed968696-5ab0-4fe0-af1c-14cadab44661", - ) - - current_snaps = source.rbd.snap_ls("test/foo") - assert len(current_snaps) == 1 - assert ( - current_snaps[0]["name"] == "backy-f0e7292e-4ad8-4f2e-86d6-f40dca2aa802" - ) - - -def test_full_backup(ceph_rbd_imagesource, backup, tmp_path, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log, uuid="a0") - revision.materialize() - backup.scan() - - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: - export.return_value = io.BytesIO(b"Han likes Leia.") - backend = revision.backend - with source(revision): - source.full(backend) - export.assert_called_with("test/foo@backy-a0") - - # the corresponding snapshot for revision a0 is created by the backup process - assert source.rbd.snap_ls("test/foo")[0]["name"] == "backy-a0" - - with backend.open("rb") as f: - assert f.read() == b"Han likes Leia." - - # Now make another full backup. This overwrites the first. - revision2 = Revision.create(backup, set(), log, uuid="a1") - revision2.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - revision2.materialize() - backup.scan() - - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: - export.return_value = io.BytesIO(b"Han loves Leia.") - backend = revision2.backend - with source(revision2): - source.full(backend) - - with backend.open("rb") as f: - assert f.read() == b"Han loves Leia." - - current_snaps = source.rbd.snap_ls("test/foo") - assert len(current_snaps) == 1 - assert current_snaps[0]["name"] == "backy-a1" - - -def test_full_backup_integrates_changes( - ceph_rbd_imagesource, backup, tmp_path, log -): - # The backup source changes between two consecutive full backups. Both - # backup images should reflect the state of the source at the time the - # backup was run. This test is here to detect regressions while optimizing - # the full backup algorithms (copying and applying deltas). - source = ceph_rbd_imagesource - content0 = BLOCK * b"A" + BLOCK * b"B" + BLOCK * b"C" + BLOCK * b"D" - content1 = BLOCK * b"A" + BLOCK * b"X" + BLOCK * b"\0" + BLOCK * b"D" - - rev0 = Revision.create(backup, set(), log) - rev0.materialize() - backup.scan() - - rev1 = Revision.create(backup, set(), log) - rev1.timestamp = backy.utils.now() + datetime.timedelta(seconds=1) - rev1.materialize() - - # check fidelity - for content, rev in [(content0, rev0), (content1, rev1)]: - with mock.patch("backy.sources.ceph.rbd.RBDClient.export") as export: - export.return_value = io.BytesIO(content) - with source(rev): - source.full(rev.backend) - export.assert_called_with("test/foo@backy-{}".format(rev.uuid)) - - with rev.backend.open("rb") as f: - assert content == f.read() - - -def test_verify_fail(backup, tmp_path, ceph_rbd_imagesource, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log) - revision.materialize() - - backup.scan() - - rbd_source = str(tmp_path / "-dev-rbd0") - with open(rbd_source, "w") as f: - f.write("Han likes Leia.") - - backend = revision.backend - with backend.open("wb") as f: - f.write(b"foobar") - # The backend has false data, so this needs to be detected. - with source(revision): - assert not source.verify(backend) - assert len(backup.quarantine.report_ids) == 1 - - -def test_verify(ceph_rbd_imagesource, backup, tmp_path, log): - source = ceph_rbd_imagesource - - # Those revision numbers are taken from the sample snapshot and need - # to match, otherwise our diff integration will (correctly) complain. - revision = Revision.create(backup, set(), log, uuid="a0") - revision.materialize() - - backup.scan() - - rbd_source = source.rbd.map("test/foo@backy-a0")["device"] - with open(rbd_source, "wb") as f: - f.write(b"Han likes Leia.") - source.rbd.unmap(rbd_source) - - with revision.backend.open("wb") as f: - f.write(b"Han likes Leia.") - f.flush() - - with source(revision): - assert source.verify(revision.backend) diff --git a/src/backy/sources/file.py b/src/backy/sources/file.py deleted file mode 100644 index 2312943b..00000000 --- a/src/backy/sources/file.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Optional - -from structlog.stdlib import BoundLogger - -import backy.backends -from backy.quarantine import QuarantineReport -from backy.revision import Revision, Trust -from backy.sources import BackySource, BackySourceContext, BackySourceFactory -from backy.utils import copy, copy_overwrite, files_are_equal - - -class File(BackySource, BackySourceFactory, BackySourceContext): - filename: str - cow: bool - revision: Revision - log: BoundLogger - - def __init__(self, config: dict, log: BoundLogger): - self.filename = config["filename"] - self.cow = config.get("cow", True) - self.log = log.bind(filename=self.filename, subsystem="file") - - def __call__(self, revision: Revision): - self.revision = revision - self.log = self.log.bind(revision_uuid=revision.uuid) - return self - - def __enter__(self): - return self - - def ready(self) -> bool: - """Check whether the source can be backed up. - - For files this means the file exists and is readable. - - """ - try: - with open(self.filename, "rb"): - pass - except Exception: - return False - return True - - def backup(self, target: "backy.backends.BackyBackend") -> None: - self.log.debug("backup") - s = open(self.filename, "rb") - parent = self.revision.get_parent() - with s as source, target.open("r+b", parent) as target_: - if self.cow and parent: - self.log.info("backup-sparse") - bytes = copy_overwrite(source, target_) - else: - self.log.info("backup-full") - bytes = copy(source, target_) - - self.revision.stats["bytes_written"] = bytes - - def verify(self, target: "backy.backends.BackyBackend") -> bool: - self.log.info("verify") - s = open(self.filename, "rb") - with s as source, target.open("rb") as target_: - return files_are_equal( - source, - target_, - report=lambda s, t, o: self.revision.backup.quarantine.add_report( - QuarantineReport(s, t, o) - ), - ) diff --git a/src/backy/sources/flyingcircus/__init__.py b/src/backy/sources/flyingcircus/__init__.py deleted file mode 100644 index 91b1aa49..00000000 --- a/src/backy/sources/flyingcircus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Make this a package. diff --git a/src/backy/sources/flyingcircus/source.py b/src/backy/sources/flyingcircus/source.py deleted file mode 100644 index 92929ee3..00000000 --- a/src/backy/sources/flyingcircus/source.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import time -import uuid - -import consulate -from structlog.stdlib import BoundLogger - -from ...timeout import TimeOut, TimeOutError -from ..ceph.source import CephRBD - - -class FlyingCircusRootDisk(CephRBD): - snapshot_timeout = 90 - - def __init__(self, config, log: BoundLogger): - self.config = config - self.vm = config["vm"] - self.consul_acl_token = config.get("consul_acl_token") - super(FlyingCircusRootDisk, self).__init__(config, log) - self.log = self.log.bind(vm=self.vm, subsystem="fc-disk") - - def create_snapshot(self, name: str) -> None: - consul = consulate.Consul(token=self.consul_acl_token) - snapshot_key = "snapshot/{}".format(str(uuid.uuid4())) - self.log.info( - "creating-snapshot", - snapshot_name=name, - snapshot_key=snapshot_key, - ) - - consul.kv[snapshot_key] = {"vm": self.vm, "snapshot": name} - - time.sleep(3) - try: - timeout = TimeOut( - self.snapshot_timeout, interval=2, raise_on_timeout=True - ) - while timeout.tick(): - for snapshot in self.rbd.snap_ls(self._image_name): - if snapshot["name"] == name: - return - except TimeOutError: - # The VM might have been shut down. Try doing a regular Ceph - # snapshot locally. - super(FlyingCircusRootDisk, self).create_snapshot(name) - except KeyboardInterrupt: - raise - finally: - # In case the snapshot still gets created: the general snapshot - # deletion code in ceph/source will clean up unused backy snapshots - # anyway. However, we need to work a little harder to delete old - # snapshot requests, otherwise we've sometimes seen those not - # getting deleted and then re-created all the time. - for key in list(consul.kv.find("snapshot/")): - try: - s = consul.kv[key] - except KeyError: - continue - try: - s = json.loads(s) - except json.decoder.JSONDecodeError: - # Clean up garbage. - self.log.warning( - "create-snapshot-removing-garbage-request", - snapshot_key=key, - ) - del consul.kv[key] - if s["vm"] != self.vm: - continue - # The knowledge about the `backy-` prefix isn't properly - # encapsulated here. - if s["snapshot"].startswith("backy-"): - self.log.info( - "create-snapshot-removing-request", - vm=s["vm"], - snapshot_name=s["snapshot"], - snapshot_key=key, - ) - del consul.kv[key] diff --git a/src/backy/sources/flyingcircus/tests/test_source.py b/src/backy/sources/flyingcircus/tests/test_source.py deleted file mode 100644 index ada47038..00000000 --- a/src/backy/sources/flyingcircus/tests/test_source.py +++ /dev/null @@ -1,87 +0,0 @@ -import json -import subprocess -from unittest import mock - -import consulate -import pytest - -from backy.sources import select_source -from backy.sources.flyingcircus.source import FlyingCircusRootDisk - - -@pytest.fixture -def fcrd(log): - return FlyingCircusRootDisk( - { - "pool": "test", - "image": "test01.root", - "vm": "test01", - "consul_acl_token": "12345", - }, - log, - ) - - -def test_select_flyingcircus_source(): - assert select_source("flyingcircus") == FlyingCircusRootDisk - - -def test_flyingcircus_source(fcrd): - assert fcrd.pool == "test" - assert fcrd.image == "test01.root" - assert fcrd.vm == "test01" - assert fcrd.consul_acl_token == "12345" - - -@pytest.mark.slow -def test_flyingcircus_consul_interaction(monkeypatch, fcrd): - consul_class = mock.Mock() - consul = consul_class() - consul.kv = ConsulKVDict() - monkeypatch.setattr(consulate, "Consul", consul_class) - - check_output = mock.Mock() - check_output.side_effect = ["[]", '[{"name": "asdf"}]'] - monkeypatch.setattr(subprocess, "check_output", check_output) - fcrd.create_snapshot("asdf") - - -class ConsulKVDict(dict): - def __setitem__(self, k, v): - if not isinstance(v, bytes): - v = json.dumps(v) - super(ConsulKVDict, self).__setitem__(k, v) - - def find(self, prefix): - for key in self: - if key.startswith(prefix): - yield key - - -@pytest.mark.slow -def test_flyingcircus_consul_interaction_timeout(monkeypatch, fcrd): - consul_class = mock.Mock() - consul = consul_class() - consul.kv = ConsulKVDict() - monkeypatch.setattr(consulate, "Consul", consul_class) - - check_output = mock.Mock() - check_output.side_effect = [ - '[{"name": "bsdf"}]', - "[]", - "[]", - "[]", - "[]", - "[]", - ] - monkeypatch.setattr(subprocess, "check_output", check_output) - - fcrd.snapshot_timeout = 2 - fcrd.create_snapshot("asdf") - - assert check_output.call_args[0][0] == [ - "rbd", - "snap", - "create", - "test/test01.root@asdf", - ] diff --git a/src/backy/tests/conftest.py b/src/backy/tests/conftest.py deleted file mode 100644 index b797d016..00000000 --- a/src/backy/tests/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -from zoneinfo import ZoneInfo - -import pytest -import tzlocal - - -@pytest.fixture -def tz_berlin(monkeypatch): - """Fix time zone to gain independece from runtime environment.""" - monkeypatch.setattr( - tzlocal, "get_localzone", lambda: ZoneInfo("Europe/Berlin") - ) diff --git a/src/backy/tests/samples/simple_file/config b/src/backy/tests/samples/simple_file/config deleted file mode 100644 index 46d5cda4..00000000 --- a/src/backy/tests/samples/simple_file/config +++ /dev/null @@ -1,8 +0,0 @@ ---- -schedule: - daily: - interval: 1d - keep: 7 -source: - type: file - filename: input-file diff --git a/src/backy/tests/test_archive.py b/src/backy/tests/test_archive.py deleted file mode 100644 index 232d4e58..00000000 --- a/src/backy/tests/test_archive.py +++ /dev/null @@ -1,194 +0,0 @@ -import shutil - -import pytest - - -@pytest.fixture -def backup_with_revisions(backup, tmp_path): - with open(str(tmp_path / "123-0.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-0 -timestamp: 2015-08-29 00:00:00+00:00 -parent: -trust: verified -stats: {bytes_written: 14868480, duration: 31.1} -tags: [daily, weekly, monthly] -""" - ) - with open(str(tmp_path / "123-1.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-1 -timestamp: 2015-08-30 01:00:00+00:00 -parent: 123-0 -stats: {bytes_written: 1486880, duration: 3.7} -server: remote1 -tags: [daily, weekly] -""" - ) - with open(str(tmp_path / "123-2.rev"), "wb") as f: - f.write( - b"""\ -uuid: 123-2 -timestamp: 2015-08-30 02:00:00+00:00 -parent: 123-1 -stats: {} -server: remote1 -tags: [daily] -""" - ) - backup.scan() - return backup - - -def test_empty_revisions(backup): - assert backup.history == [] - - -def test_find_revision_empty(backup): - with pytest.raises(KeyError): - backup.find("-1") - with pytest.raises(KeyError): - backup.find("last") - with pytest.raises(KeyError): - backup.find("fdasfdka") - - -def test_load_revisions(backup_with_revisions): - a = backup_with_revisions - assert [x.uuid for x in a.history] == ["123-0", "123-1", "123-2"] - assert a.history[0].get_parent() is None - assert a.history[1].get_parent() is None - assert a.history[2].get_parent().uuid == "123-1" - - -def test_find_revisions(backup_with_revisions): - a = backup_with_revisions - assert a.find_revisions("all") == a.history - assert a.find_revisions("1") == [a.find("1")] - assert a.find_revisions("tag:dail") == [] - assert a.find_revisions("trust:verified") == [a.find("123-0")] - assert a.find_revisions("2..1") == [a.find("2"), a.find("1")] - assert a.find_revisions("1..2") == [a.find("2"), a.find("1")] - assert a.find_revisions("123-0..123-1") == [ - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("last(tag:daily)..123-1") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("123-1..") == [a.find("123-1"), a.find("123-2")] - assert a.find_revisions("..") == a.history - assert a.find_revisions("first..last") == a.history - assert a.find_revisions("tag:weekly") == [a.find("123-0"), a.find("123-1")] - assert a.find_revisions("1, tag:weekly") == [ - a.find("123-1"), - a.find("123-0"), - ] - assert a.find_revisions("0,2..1") == [ - a.find("123-2"), - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("2,1, 2,0,1") == [ - a.find("123-0"), - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("2015-09-01..2015-08-30") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("2015-08-30..last(last(tag:daily&clean))") == [ - a.find("123-1"), - ] - assert a.find_revisions("2015-08-30..,trust:verified") == [ - a.find("123-1"), - a.find("123-2"), - a.find("123-0"), - ] - assert a.find_revisions( - "first(trust:verified)..last(reverse(2015-08-30..))" - ) == [ - a.find("123-0"), - a.find("123-1"), - ] - assert a.find_revisions("reverse(not(clean))") == [ - a.find("123-2"), - ] - assert a.find_revisions("last(reverse(first(123-1, 123-0)))") == [ - a.find("123-1"), - ] - assert a.find_revisions("( (first( (123-0, 123-1)) ))") == [ - a.find("123-0"), - ] - assert a.find_revisions("server:aaaa") == [] - assert a.find_revisions("server:remote1") == [ - a.find("123-1"), - a.find("123-2"), - ] - assert a.find_revisions("local") == [ - a.find("123-0"), - ] - assert a.find_revisions("remote") == [ - a.find("123-1"), - a.find("123-2"), - ] - - -def test_find_revisions_should_raise_invalid_spec(backup_with_revisions): - a = backup_with_revisions - with pytest.raises(KeyError): - a.find_revisions("aaaa..125") - with pytest.raises(AssertionError): - a.find_revisions("last)..5") - with pytest.raises(KeyError): - a.find_revisions("clean-..,1") - with pytest.raises(KeyError): - a.find_revisions("123-") - with pytest.raises(IndexError): - a.find_revisions("first(not(all))") - with pytest.raises(KeyError): - a.find_revisions("2015-09..2015-08-30") - - -def test_find_revision(backup_with_revisions): - a = backup_with_revisions - assert a.find("last").uuid == "123-2" - with pytest.raises(KeyError): - a.find("-1") - assert a.find("0").uuid == "123-2" - assert a.find("1").uuid == "123-1" - assert a.find("2").uuid == "123-0" - - assert a.find("123-1").uuid == "123-1" - with pytest.raises(KeyError): - a.find("125-125") - - assert a.find("last(tag:daily)").uuid == "123-2" - assert a.find("last(tag:weekly)").uuid == "123-1" - assert a.find("last(tag:monthly)").uuid == "123-0" - assert a.find(" first( tag:monthly ) ").uuid == "123-0" - - -def test_get_history(backup_with_revisions): - assert 2 == len(backup_with_revisions.clean_history) - assert ( - backup_with_revisions.clean_history - == backup_with_revisions.get_history(clean=True) - ) - assert 1 == len(backup_with_revisions.local_history) - assert ( - backup_with_revisions.local_history - == backup_with_revisions.get_history(local=True) - ) - assert 1 == len(backup_with_revisions.get_history(clean=True, local=True)) - - -def test_ignore_duplicates(backup_with_revisions, tmp_path): - shutil.copy(str(tmp_path / "123-2.rev"), str(tmp_path / "123-3.rev")) - a = backup_with_revisions - a.scan() - assert 3 == len(a.history) diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index e2f68b21..966eb0e0 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -1,118 +1,213 @@ -import os.path -import subprocess -from unittest import mock +import shutil import pytest -import backy.utils from backy.revision import Revision -from backy.sources.file import File -from backy.utils import CHUNK_SIZE -def test_config(simple_file_config, tmp_path): - backup = simple_file_config - - assert backup.path == tmp_path - assert isinstance(backup.source, File) - assert backup.source.filename == "input-file" - - -def test_find(simple_file_config, tmp_path, log): - backup = simple_file_config - rev = Revision.create(backup, set(), log, uuid="123-456") - rev.materialize() - backup.scan() - assert tmp_path / "123-456" == backup.find("0").filename +@pytest.fixture +def repository_with_revisions(repository, tmp_path): + with open(str(tmp_path / "123-0.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-0 +timestamp: 2015-08-29 00:00:00+00:00 +parent: +trust: verified +stats: {bytes_written: 14868480, duration: 31.1} +tags: [daily, weekly, monthly] +""" + ) + with open(str(tmp_path / "123-1.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-1 +timestamp: 2015-08-30 01:00:00+00:00 +parent: 123-0 +stats: {bytes_written: 1486880, duration: 3.7} +server: remote1 +tags: [daily, weekly] +""" + ) + with open(str(tmp_path / "123-2.rev"), "wb") as f: + f.write( + b"""\ +uuid: 123-2 +timestamp: 2015-08-30 02:00:00+00:00 +parent: 123-1 +stats: {} +server: remote1 +tags: [daily] +""" + ) + repository.scan() + return repository + + +def test_empty_revisions(repository): + assert repository.history == [] + + +def test_find_revision_empty(repository): + with pytest.raises(KeyError): + repository.find("-1") + with pytest.raises(KeyError): + repository.find("last") + with pytest.raises(KeyError): + repository.find("fdasfdka") + + +def test_load_revisions(repository_with_revisions): + a = repository_with_revisions + assert [x.uuid for x in a.history] == ["123-0", "123-1", "123-2"] + assert a.history[0].get_parent() is None + assert a.history[1].get_parent() is None + assert a.history[2].get_parent().uuid == "123-1" + + +def test_find_revisions(repository_with_revisions): + a = repository_with_revisions + assert a.find_revisions("all") == a.history + assert a.find_revisions("1") == [a.find("1")] + assert a.find_revisions("tag:dail") == [] + assert a.find_revisions("trust:verified") == [a.find("123-0")] + assert a.find_revisions("2..1") == [a.find("2"), a.find("1")] + assert a.find_revisions("1..2") == [a.find("2"), a.find("1")] + assert a.find_revisions("123-0..123-1") == [ + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("last(tag:daily)..123-1") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("123-1..") == [a.find("123-1"), a.find("123-2")] + assert a.find_revisions("..") == a.history + assert a.find_revisions("first..last") == a.history + assert a.find_revisions("tag:weekly") == [a.find("123-0"), a.find("123-1")] + assert a.find_revisions("1, tag:weekly") == [ + a.find("123-1"), + a.find("123-0"), + ] + assert a.find_revisions("0,2..1") == [ + a.find("123-2"), + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("2,1, 2,0,1") == [ + a.find("123-0"), + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("2015-09-01..2015-08-30") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("2015-08-30..last(last(tag:daily&clean))") == [ + a.find("123-1"), + ] + assert a.find_revisions("2015-08-30..,trust:verified") == [ + a.find("123-1"), + a.find("123-2"), + a.find("123-0"), + ] + assert a.find_revisions( + "first(trust:verified)..last(reverse(2015-08-30..))" + ) == [ + a.find("123-0"), + a.find("123-1"), + ] + assert a.find_revisions("reverse(not(clean))") == [ + a.find("123-2"), + ] + assert a.find_revisions("last(reverse(first(123-1, 123-0)))") == [ + a.find("123-1"), + ] + assert a.find_revisions("( (first( (123-0, 123-1)) ))") == [ + a.find("123-0"), + ] + assert a.find_revisions("server:aaaa") == [] + assert a.find_revisions("server:remote1") == [ + a.find("123-1"), + a.find("123-2"), + ] + assert a.find_revisions("local") == [ + a.find("123-0"), + ] + assert a.find_revisions("remote") == [ + a.find("123-1"), + a.find("123-2"), + ] + + +def test_find_revisions_should_raise_invalid_spec(repository_with_revisions): + a = repository_with_revisions + with pytest.raises(KeyError): + a.find_revisions("aaaa..125") + with pytest.raises(AssertionError): + a.find_revisions("last)..5") + with pytest.raises(KeyError): + a.find_revisions("clean-..,1") + with pytest.raises(KeyError): + a.find_revisions("123-") + with pytest.raises(IndexError): + a.find_revisions("first(not(all))") + with pytest.raises(KeyError): + a.find_revisions("2015-09..2015-08-30") -def test_find_should_raise_if_not_found(simple_file_config, log): - backup = simple_file_config - rev = Revision.create(backup, set(), log) - rev.materialize() - backup.scan() +def test_find_revision(repository_with_revisions): + a = repository_with_revisions + assert a.find("last").uuid == "123-2" with pytest.raises(KeyError): - backup.find("no such revision") - - -def test_restore_target(simple_file_config): - backup = simple_file_config - source = "input-file" - target = "restore.img" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - backup.restore("0", target) - with open(source, "rb") as s, open(target, "rb") as t: - assert s.read() == t.read() - - -def test_restore_stdout(simple_file_config, capfd): - backup = simple_file_config - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) - backup.restore("0", "-") - assert not os.path.exists("-") - out, err = capfd.readouterr() - assert "volume contents\n" == out - - -def test_restore_backy_extract(simple_file_config, monkeypatch): - check_output = mock.Mock(return_value="backy-extract 1.1.0") - monkeypatch.setattr(subprocess, "check_output", check_output) - backup = simple_file_config - backup.restore_backy_extract = mock.Mock() - source = "input-file" - with open(source, "wb") as f: - f.write(b"a" * CHUNK_SIZE) - backup.backup({"daily"}) - backup.restore("0", "restore.img") - check_output.assert_called() - backup.restore_backy_extract.assert_called_once_with( - backup.find("0"), "restore.img" - ) + a.find("-1") + assert a.find("0").uuid == "123-2" + assert a.find("1").uuid == "123-1" + assert a.find("2").uuid == "123-0" + assert a.find("123-1").uuid == "123-1" + with pytest.raises(KeyError): + a.find("125-125") -def test_backup_corrupted(simple_file_config): - backup = simple_file_config - source = "input-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) + assert a.find("last(tag:daily)").uuid == "123-2" + assert a.find("last(tag:weekly)").uuid == "123-1" + assert a.find("last(tag:monthly)").uuid == "123-0" + assert a.find(" first( tag:monthly ) ").uuid == "123-0" - store = backup.history[0].backend.store - chunk_path = store.chunk_path(next(iter(store.seen))) - os.chmod(chunk_path, 0o664) - with open(chunk_path, "wb") as f: - f.write(b"invalid") - backup.backup({"daily"}) - assert backup.history == [] - assert not os.path.exists(chunk_path) +def test_get_history(repository_with_revisions): + assert 2 == len(repository_with_revisions.clean_history) + assert ( + repository_with_revisions.clean_history + == repository_with_revisions.get_history(clean=True) + ) + assert 1 == len(repository_with_revisions.local_history) + assert ( + repository_with_revisions.local_history + == repository_with_revisions.get_history(local=True) + ) + assert 1 == len( + repository_with_revisions.get_history(clean=True, local=True) + ) -def test_restore_mixed_backend(simple_file_config): - backup = simple_file_config - backup.default_backend_type = "cowfile" - source = "input-file" - out = "output-file" - with open(source, "wb") as f: - f.write(b"volume contents\n") - backup.backup({"daily"}) +def test_ignore_duplicates(repository_with_revisions, tmp_path): + shutil.copy(str(tmp_path / "123-2.rev"), str(tmp_path / "123-3.rev")) + a = repository_with_revisions + a.scan() + assert 3 == len(a.history) - with open(source, "wb") as f: - f.write(b"meow\n") - backup.default_backend_type = "chunked" - backup.backup({"daily"}) - assert len(backup.history) == 2 +def test_find(repository, log): + rev = Revision.create(repository, set(), log, uuid="123-456") + rev.materialize() + repository.scan() + assert "123-456" == repository.find("0").uuid - backup.restore("1", out) - with open(out, "rb") as f: - assert f.read() == b"volume contents\n" - backup.restore("0", out) - with open(out, "rb") as f: - assert f.read() == b"meow\n" +def test_find_should_raise_if_not_found(repository, log): + rev = Revision.create(repository, set(), log) + rev.materialize() + repository.scan() + with pytest.raises(KeyError): + repository.find("no such revision") diff --git a/src/backy/tests/test_backy.py b/src/backy/tests/test_backy.py deleted file mode 100644 index 5a1065c5..00000000 --- a/src/backy/tests/test_backy.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import subprocess - -import pytest - -import backy.backup -from backy.ext_deps import BACKY_CMD, BASH -from backy.tests import Ellipsis - - -def generate_test_data(target, size, marker): - f = open(target, "wb") - block = 8 * 1024 - for chunk in range(size // block): - f.write(marker * block) - f.write(marker * (size % block)) - f.close() - - -def test_smoketest_internal(tmp_path, log): - # These copies of data are intended to be different versions of the same - # file. - source1 = str(tmp_path / "image1.qemu") - generate_test_data(source1, 2 * 1024**2, b"1") - source2 = str(tmp_path / "image2.qemu") - generate_test_data(source2, 2 * 1024**2, b"2") - source3 = str(tmp_path / "image3.qemu") - generate_test_data(source3, 2 * 1024**2, b"3") - - backup_dir = tmp_path / "image1.backup" - os.mkdir(str(backup_dir)) - with open(str(backup_dir / "config"), "wb") as f: - f.write( - ( - "{'source': {'type': 'file', 'filename': '%s'}," - "'schedule': {'daily': {'interval': '1d', 'keep': 7}}}" - % source1 - ).encode("utf-8") - ) - backup = backy.backup.Backup(backup_dir, log) - - # Backup first state - backup.backup({"manual:test"}) - - # Restore first state form newest revision at position 0 - restore_target = str(tmp_path / "image1.restore") - backup.restore("0", restore_target) - with pytest.raises(IOError): - open(backup.history[-1].filename, "wb") - with pytest.raises(IOError): - open(backup.history[-1].info_filename, "wb") - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup second state - backup.source.filename = source2 - backup.backup({"test"}, force=True) - assert len(backup.history) == 2 - - # Restore second state from second backup which is the newest at position 0 - backup.restore("0", restore_target) - d1 = open(source2, "rb").read() - d2 = open(restore_target, "rb").read() - assert d1 == d2 - - # Our original backup is now at position 1. Lets restore that again. - backup.restore("1", restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup second state again - backup.source.filename = source2 - backup.backup({"manual:test"}) - assert len(backup.history) == 3 - - # Restore image2 from its most recent at position 0 - backup.restore("0", restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Restore image2 from its previous backup, now at position 1 - backup.restore("1", restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Our original backup is now at position 2. Lets restore that again. - backup.restore("2", restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - # Backup third state - backup.source.filename = source3 - backup.backup({"test"}, True) - assert len(backup.history) == 4 - - # Restore image3 from the most curent state - backup.restore("0", restore_target) - assert open(source3, "rb").read() == open(restore_target, "rb").read() - - # Restore image2 from position 1 and 2 - backup.restore("1", restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - backup.restore("2", restore_target) - assert open(source2, "rb").read() == open(restore_target, "rb").read() - - # Restore image1 from position 3 - backup.restore("3", restore_target) - assert open(source1, "rb").read() == open(restore_target, "rb").read() - - -@pytest.mark.slow -def test_smoketest_external(): - output = subprocess.check_output( - [BASH, os.path.dirname(__file__) + "/../../../smoketest.sh"], - env=os.environ | {"BACKY_CMD": BACKY_CMD}, - ) - output = output.decode("utf-8") - assert ( - Ellipsis( - """\ -Using /... as workspace. -Generating Test Data.. Done. -Backing up img_state1.img. Done. -Backing up img_state1.img with unknown tag. Done. -Restoring img_state1.img from level 0. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state2.img. Done. -Restoring img_state2.img from level 0. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 1. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state2.img again. Done. -Restoring img_state2.img from level 0. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state2.img from level 1. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 2. Done. -Diffing restore_state1.img against img_state1.img. Success. -Backing up img_state3.img. Done. -Restoring img_state3.img from level 0. Done. -Diffing restore_state3.img against img_state3.img. Success. -Restoring img_state2.img from level 1. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state2.img from level 2. Done. -Diffing restore_state2.img against img_state2.img. Success. -Restoring img_state1.img from level 3. Done. -Diffing restore_state1.img against img_state1.img. Success. -┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ -┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ -┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ daily │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ -4 revisions containing 2.0 MiB data (estimated) -┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ -┃ ... ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ -┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ test │ trusted │ │ -│ ... │ │ │ │ │ │ │ -│ ... │ ... │ 512.0 KiB │ a moment │ manual:te… │ trusted │ │ -│ ... │ │ │ │ │ │ │ -└───────────┴───────────┴───────────┴──────────┴────────────┴─────────┴────────┘ -3 revisions containing 1.5 MiB data (estimated) -""" - ) - == output - ) diff --git a/src/backy/tests/test_fallocate.py b/src/backy/tests/test_fallocate.py deleted file mode 100644 index a6db905d..00000000 --- a/src/backy/tests/test_fallocate.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest - -import backy.fallocate - - -@pytest.fixture -def testfile(tmp_path): - fn = str(tmp_path / "myfile") - with open(fn, "wb") as f: - f.write(b"\xde\xad\xbe\xef" * 32) - return fn - - -def test_punch_hole(testfile): - with open(testfile, "r+b") as f: - f.seek(0) - backy.fallocate.punch_hole(f, 2, 4) - f.seek(0) - assert f.read(8) == b"\xde\xad\x00\x00\x00\x00\xbe\xef" - - -def test_punch_hole_needs_length(testfile): - with pytest.raises(IOError): - with open(testfile, "r+b") as f: - backy.fallocate.punch_hole(f, 10, 0) - - -def test_punch_hole_needs_writable_file(testfile): - with pytest.raises(OSError): - with open(testfile, "rb") as f: - backy.fallocate.punch_hole(f, 0, 1) - - -def test_punch_hole_needs_nonnegative_offset(testfile): - with pytest.raises(OSError): - with open(testfile, "r+b") as f: - backy.fallocate.punch_hole(f, -1, 1) - - -def test_fake_fallocate_only_punches_holes(testfile): - with pytest.raises(NotImplementedError): - with open(testfile, "r+b") as f: - backy.fallocate._fake_fallocate(f, 0, 0, 10) diff --git a/src/backy/tests/test_main.py b/src/backy/tests/test_main.py deleted file mode 100644 index 46d20333..00000000 --- a/src/backy/tests/test_main.py +++ /dev/null @@ -1,567 +0,0 @@ -import datetime -import os -import pprint -import sys -from functools import partialmethod - -import pytest - -import backy.backup -import backy.client -import backy.main -from backy import utils -from backy.revision import Revision -from backy.tests import Ellipsis - - -@pytest.fixture -def argv(): - original = sys.argv - new = original[:1] - sys.argv = new - yield new - sys.argv = original - - -def test_display_usage(capsys, argv): - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} - ... -""" - == out - ) - assert err == "" - - -def test_display_client_usage(capsys, argv): - argv.append("client") - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... -""" - == out - ) - assert err == "" - - -def test_display_help(capsys, argv): - argv.append("--help") - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -usage: pytest [-h] [-v] [-l LOGFILE] [-b BACKUPDIR] [-t TASKID] - {client,backup,restore,purge,find,status,\ -upgrade,scheduler,distrust,verify,forget,tags,expire,push,pull} - ... - -Backup and restore for block devices. - -positional arguments: -... -""" - ) - == out - ) - assert err == "" - - -def test_display_client_help(capsys, argv): - argv.extend(["client", "--help"]) - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -usage: pytest client [-h] [-c CONFIG] [-p PEER] [--url URL] [--token TOKEN] - {jobs,status,run,runall,reload,check} ... - -positional arguments: -... -""" - ) - == out - ) - assert err == "" - - -def test_verbose_logging(capsys, argv): - # This is just a smoke test to ensure the appropriate code path - # for -v is covered. - argv.extend(["-v"]) - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - - -def print_args(*args, return_value=None, **kw): - print(args) - pprint.pprint(kw) - return return_value - - -async def async_print_args(*args, **kw): - print_args(*args, **kw) - - -def test_call_status(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "status", print_args) - argv.extend(["-v", "-b", str(backup.path), "status"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'revision': 'all', 'yaml_': False} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... status' -... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - - -@pytest.mark.parametrize("success", [False, True]) -def test_call_backup(success, tmp_path, capsys, argv, monkeypatch): - os.makedirs(tmp_path / "backy") - os.chdir(tmp_path / "backy") - - with open(tmp_path / "backy" / "config", "wb") as f: - f.write( - """ ---- -schedule: - daily: - interval: 1d - keep: 7 -source: - type: file - filename: {} -""".format( - __file__ - ).encode( - "utf-8" - ) - ) - - monkeypatch.setattr( - backy.backup.Backup, - "backup", - partialmethod(print_args, return_value=success), - ) - argv.extend(["-v", "backup", "manual:test"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(, {'manual:test'}, False) -{} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v backup manual:test' -... D command/parsed func='backup' func_args={{'force': False, 'tags': 'manual:test'}} -... D quarantine/scan entries=0 -... D command/return-code code={int(not success)} -""" - ) - == utils.log_data - ) - assert exit.value.code == int(not success) - - -def test_call_find(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "find", print_args) - argv.extend(["-v", "-b", str(backup.path), "find", "-r", "1"]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'revision': '1', 'uuid': False} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... find -r 1' -... D command/parsed func='find' func_args={'uuid': False, 'revision': '1'} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -@pytest.mark.parametrize( - ["action", "args"], - [ - ("jobs", {"filter_re": "test"}), - ("status", dict()), - ("run", {"job": "test"}), - ("runall", dict()), - ("reload", dict()), - ("check", dict()), - ], -) -def test_call_client( - capsys, backup, argv, monkeypatch, log, tmp_path, action, args -): - monkeypatch.setattr(backy.client.CLIClient, action, async_print_args) - conf = str(tmp_path / "conf") - with open(conf, "w") as c: - c.write( - f"""\ -global: - base-dir: {str(tmp_path)} -api: - addrs: "127.0.0.1, ::1" - port: 1234 - cli-default: - token: "test" - -schedules: {{}} -jobs: {{}} -""" - ) - - argv.extend(["-v", "client", "-c", conf, action, *args.values()]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{args} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v client -c ... {action}{" "*bool(args)}{", ".join(args.values())}' -... D command/parsed func='client' func_args={{'config': PosixPath('...'), 'peer': None, \ -'url': None, 'token': None{", "*bool(args)}{str(args)[1:-1]}, 'apifunc': '{action}'}} -... D daemon/read-config ... -... D command/return-code code=0 -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -def test_call_scheduler(capsys, backup, argv, monkeypatch, tmp_path): - monkeypatch.setattr(backy.main.Command, "scheduler", print_args) - argv.extend( - [ - "-v", - "-b", - str(backup.path), - "-l", - str(tmp_path / "backy.log"), - "scheduler", - ] - ) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{'config': PosixPath('/etc/backy.conf')} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D command/invoked args='... -v -b ... scheduler' -... D command/parsed func='scheduler' func_args={'config': PosixPath('/etc/backy.conf')} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -@pytest.mark.parametrize("action", ["set", "add", "remove"]) -def test_call_tags(capsys, backup, argv, monkeypatch, action): - monkeypatch.setattr(backy.main.Command, "tags", print_args) - argv.extend( - ["-v", "-b", str(backup.path), "tags", action, "-r", "last", "manual:a"] - ) - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{{'action': '{action}', - 'autoremove': False, - 'expect': None, - 'force': False, - 'revision': 'last', - 'tags': 'manual:a'}} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D quarantine/scan entries=0 -... D command/invoked args='... -v -b ... tags {action} -r last manual:a' -... D command/parsed func='tags' func_args={{'autoremove': False, 'force': False, 'expect': None, \ -'action': '{action}', 'revision': 'last', 'tags': 'manual:a'}} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -def test_call_expire(capsys, backup, argv, monkeypatch): - monkeypatch.setattr(backy.main.Command, "expire", print_args) - argv.extend(["-v", "-b", str(backup.path), "expire"]) - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - """\ -(,) -{} -""" - ) - == out - ) - assert ( - Ellipsis( - """\ -... D quarantine/scan entries=0 -... D command/invoked args='... -v -b ... expire' -... D command/parsed func='expire' func_args={} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -@pytest.mark.parametrize("action", ["pull", "push"]) -def test_call_pull_push(capsys, backup, argv, monkeypatch, tmp_path, action): - monkeypatch.setattr(backy.main.Command, action, print_args) - conf = tmp_path / "conf" - with open(conf, "w") as c: - c.write( - f"""\ -global: - base-dir: {str(tmp_path)} -api: - addrs: "127.0.0.1, ::1" - port: 1234 - cli-default: - token: "test" -peers : {{}} -schedules: {{}} -jobs: {{}} -""" - ) - - argv.extend(["-v", "-b", str(backup.path), action, "-c", str(conf)]) - utils.log_data = "" - with pytest.raises(SystemExit) as exit: - backy.main.main() - assert exit.value.code == 0 - out, err = capsys.readouterr() - assert ( - Ellipsis( - f"""\ -(,) -{{'config': {repr(conf)}}} -""" - ) - == out - ) - assert ( - Ellipsis( - f"""\ -... D command/invoked args='... -v -b {backup.path} {action} -c {conf}' -... D command/parsed func='{action}' func_args={{'config': {repr(conf)}}} -... D command/successful \n\ -""" - ) - == utils.log_data - ) - assert exit.value.code == 0 - - -def test_call_unexpected_exception( - capsys, backup, argv, monkeypatch, log, tmp_path -): - def do_raise(*args, **kw): - raise RuntimeError("test") - - monkeypatch.setattr(backy.main.Command, "status", do_raise) - import os - - monkeypatch.setattr(os, "_exit", lambda x: None) - - argv.extend( - ["-l", str(tmp_path / "backy.log"), "-b", str(backup.path), "status"] - ) - utils.log_data = "" - with pytest.raises(SystemExit): - backy.main.main() - out, err = capsys.readouterr() - assert "" == out - assert ( - Ellipsis( - """\ -... D command/invoked args='... -l ... -b ... status' -... D command/parsed func='status' func_args={'yaml_': False, 'revision': 'all'} -... E command/failed exception_class='builtins.RuntimeError' exception_msg='test' -exception>\tTraceback (most recent call last): -exception>\t File ".../src/backy/main.py", line ..., in main -exception>\t ret = func(**func_args) -exception>\t File ".../src/backy/tests/test_main.py", line ..., in do_raise -exception>\t raise RuntimeError("test") -exception>\tRuntimeError: test -""" - ) - == utils.log_data - ) - - -def test_commands_wrapper_status( - backup, tmp_path, capsys, clock, tz_berlin, log -): - commands = backy.main.Command(tmp_path, "AAAA", log) - - revision1 = Revision.create(backup, {"daily"}, log, uuid="1") - revision1.materialize() - - revision2 = Revision.create(backup, {"daily"}, log, uuid="2") - revision2.timestamp = backy.utils.now() + datetime.timedelta(hours=1) - revision2.server = "remote" - revision2.orig_tags = {"daily"} - revision2.materialize() - - revision3 = Revision.create(backup, {"new", "same"}, log, uuid="3") - revision3.timestamp = backy.utils.now() + datetime.timedelta(hours=2) - revision3.server = "remote" - revision3.orig_tags = {"old", "same"} - revision3.materialize() - - commands.status(yaml_=False, revision="all") - out, err = capsys.readouterr() - - assert err == "" - assert out == Ellipsis( - """\ -┏━━━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ -┃ Date ┃ ┃ ┃ ┃ ┃ ┃ ┃ -┃ (Europe/Berli… ┃ ID ┃ Size ┃ Duration ┃ Tags ┃ Trust ┃ Server ┃ -┡━━━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ -│ 2015-09-01 │ 1 │ 0 Bytes │ - │ daily │ trusted │ │ -│ 09:06:47 │ │ │ │ │ │ │ -│ 2015-09-01 │ 2 │ 0 Bytes │ - │ daily │ trusted │ remote │ -│ 10:06:47 │ │ │ │ │ │ │ -│ 2015-09-01 │ 3 │ 0 Bytes │ - │ +new,-old,same │ trusted │ remote │ -│ 11:06:47 │ │ │ │ │ │ │ -└────────────────┴────┴─────────┴──────────┴────────────────┴─────────┴────────┘ -3 revisions containing 0 Bytes data (estimated) -1 pending change(s) (Push changes with `backy push`) -""" - ) - - -def test_commands_wrapper_status_yaml( - backup, tmp_path, capsys, clock, tz_berlin, log -): - commands = backy.main.Command(tmp_path, "AAAA", log) - - revision = Revision.create(backup, set(), log, uuid="1") - revision.stats["duration"] = 3.5 - revision.stats["bytes_written"] = 42 - revision.materialize() - - commands.status(yaml_=True, revision="all") - out, err = capsys.readouterr() - - assert err == "" - assert ( - out - == f"""\ -- backend_type: {backup.default_backend_type} - orig_tags: [] - parent: '' - server: '' - stats: - bytes_written: 42 - duration: 3.5 - tags: [] - timestamp: 2015-09-01 07:06:47+00:00 - trust: trusted - uuid: '1' - -""" - ) diff --git a/src/backy/tests/test_quarantine.py b/src/backy/tests/test_report.py similarity index 57% rename from src/backy/tests/test_quarantine.py rename to src/backy/tests/test_report.py index ddf9da71..9f4dd2a2 100644 --- a/src/backy/tests/test_quarantine.py +++ b/src/backy/tests/test_report.py @@ -1,26 +1,27 @@ -from backy.quarantine import QuarantineReport, QuarantineStore +from backy.report import ChunkMismatchReport from backy.tests import Ellipsis -def test_quarantine(tmp_path, log, clock): - store = QuarantineStore(tmp_path, log) - store.add_report(QuarantineReport(b"source", b"target", 3)) +def test_report(tmp_path, repository, log, clock): + repository.add_report(ChunkMismatchReport(b"source", b"target", 3)) with open( - (tmp_path / "quarantine" / store.report_ids[0]).with_suffix(".report") + (tmp_path / "quarantine" / repository.report_ids[0]).with_suffix( + ".report" + ) ) as report: assert ( Ellipsis( f"""\ -uuid: {store.report_ids[0]} +uuid: {repository.report_ids[0]} +timestamp: 2015-09-01 07:06:47+00:00 source_hash: 36cd38f49b9afa08222c0dc9ebfe35eb target_hash: 42aefbae01d2dfd981f7da7d823d689e offset: 3 -timestamp: 2015-09-01 07:06:47+00:00 traceback: |- ... - File ".../src/backy/tests/test_quarantine.py", line ..., in test_quarantine - store.add_report(QuarantineReport(b"source", b"target", 3)) - File ".../src/backy/quarantine.py", line ..., in __init__ + File ".../src/backy/tests/test_report.py", line ..., in test_report + repository.add_report(ChunkMismatchReport(b"source", b"target", 3)) + File ".../src/backy/report.py", line ..., in __init__ self.traceback = "".join(traceback.format_stack()).strip() """ ) diff --git a/src/backy/tests/test_repository.py b/src/backy/tests/test_repository.py new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/src/backy/tests/test_repository.py @@ -0,0 +1 @@ +# TODO diff --git a/src/backy/tests/test_revision.py b/src/backy/tests/test_revision.py index 498a6e7b..dd7b1407 100644 --- a/src/backy/tests/test_revision.py +++ b/src/backy/tests/test_revision.py @@ -11,63 +11,61 @@ SAMPLE_DIR = Path(__file__).parent.joinpath("samples") -def test_revision_base(backup, log): - revision = Revision.create(backup, set(), log, uuid="uuid") +def test_revision_base(repository, log): + revision = Revision.create(repository, set(), log, uuid="uuid") assert revision.uuid == "uuid" - assert revision.backup is backup + assert revision.repository is repository -def test_revision_create(backup, log): - backup.history = [] - r = Revision.create(backup, {"1", "2"}, log) +def test_revision_create(repository, log): + repository.history = [] + r = Revision.create(repository, {"1", "2"}, log) assert r.uuid is not None assert r.tags == {"1", "2"} assert (backy.utils.now() - r.timestamp).total_seconds() < 10 - assert r.backup is backup + assert r.repository is repository -def test_revision_create_child(backup, log): - backup.history = [Revision.create(backup, set(), log, uuid="asdf")] - r = Revision.create(backup, {"test"}, log) +def test_revision_create_child(repository, log): + repository.history = [Revision.create(repository, set(), log, uuid="asdf")] + r = Revision.create(repository, {"test"}, log) assert r.uuid is not None assert r.tags == {"test"} assert r.get_parent().uuid == "asdf" assert (backy.utils.now() - r.timestamp).total_seconds() < 10 - assert r.backup is backup + assert r.repository is repository -def test_load_sample1(backup, log): - r = Revision.load(SAMPLE_DIR / "sample1.rev", backup, log) +def test_load_sample1(repository, log): + r = Revision.load(SAMPLE_DIR / "sample1.rev", repository, log) assert r.uuid == "asdf" assert r.timestamp == datetime.datetime(2015, 8, 1, 20, 0, tzinfo=UTC) assert r.get_parent() is None - assert r.backup is backup + assert r.repository is repository -def test_load_sample2(backup, log): - r = Revision.load(SAMPLE_DIR / "sample2.rev", backup, log) +def test_load_sample2(repository, log): + r = Revision.load(SAMPLE_DIR / "sample2.rev", repository, log) assert r.uuid == "asdf2" assert r.timestamp == datetime.datetime(2015, 8, 1, 21, 0, tzinfo=UTC) assert r.get_parent() is None - assert r.backup is backup + assert r.repository is repository def test_filenames_based_on_uuid_and_backup_dir(log): backup = mock.Mock() backup.path = Path("/srv/backup/foo") r = Revision.create(backup, set(), log, uuid="asdf") - assert r.filename == Path("/srv/backup/foo/asdf") assert r.info_filename == Path("/srv/backup/foo/asdf.rev") -def test_store_revision_data(backup, clock, log): - backup.history = [Revision.create(backup, set(), log, uuid="asdf")] - r = Revision.create(backup, set(), log, uuid="asdf2") +def test_store_revision_data(repository, clock, log): + repository.history = [Revision.create(repository, set(), log, uuid="asdf")] + r = Revision.create(repository, set(), log, uuid="asdf2") r.write_info() with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { "parent": "asdf", - "backend_type": backup.default_backend_type, "uuid": "asdf2", "stats": {"bytes_written": 0}, "tags": [], @@ -78,13 +76,12 @@ def test_store_revision_data(backup, clock, log): } -def test_store_revision_data_no_parent(backup, clock, log): - r = Revision.create(backup, set(), log, uuid="asdf2") +def test_store_revision_data_no_parent(repository, clock, log): + r = Revision.create(repository, set(), log, uuid="asdf2") r.write_info() with open(r.info_filename, encoding="utf-8") as info: assert yaml.safe_load(info) == { "parent": "", - "backend_type": backup.default_backend_type, "uuid": "asdf2", "stats": {"bytes_written": 0}, "tags": [], @@ -95,15 +92,9 @@ def test_store_revision_data_no_parent(backup, clock, log): } -def test_delete_revision(backup, log): - r = Revision.create(backup, set(), log, uuid="123-456") +def test_delete_revision(repository, log): + r = Revision.create(repository, set(), log, uuid="123-456") r.materialize() - assert backup.path.joinpath("123-456.rev").exists() - backup.scan() - backup.path.joinpath("123-456").open("w") - assert backup.path.joinpath("123-456.rev").exists() + assert repository.path.joinpath("123-456.rev").exists() r.remove() - # Ensure the revision data file exists - we do not implicitly create - # it any longer. - assert not backup.path.joinpath("123-456").exists() - assert not backup.path.joinpath("123-456.rev").exists() + assert not repository.path.joinpath("123-456.rev").exists() diff --git a/src/backy/tests/test_schedule.py b/src/backy/tests/test_schedule.py index 710e8b58..01de3a00 100644 --- a/src/backy/tests/test_schedule.py +++ b/src/backy/tests/test_schedule.py @@ -21,7 +21,7 @@ def test_parse_duration(): def test_first_backup_catches_up_all_tags_immediately_in_next_interval( - schedule, backup, clock + schedule, repository, clock ): schedule.configure( { @@ -32,17 +32,17 @@ def test_first_backup_catches_up_all_tags_immediately_in_next_interval( assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily", "test"}, - ) == schedule.next(backy.utils.now(), 1, backup) + ) == schedule.next(backy.utils.now(), 1, repository) -def test_tag_first_interval_after_now(schedule, backup, clock): +def test_tag_first_interval_after_now(schedule, repository, clock): assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, ) == schedule._next_ideal(backy.utils.now(), 1) -def test_tag_second_interval_after_now(schedule, backup, clock): +def test_tag_second_interval_after_now(schedule, repository, clock): assert ( datetime(2015, 9, 3, 0, 0, 5, tzinfo=UTC), {"daily"}, @@ -51,7 +51,7 @@ def test_tag_second_interval_after_now(schedule, backup, clock): ) -def test_tag_second_interval_with_different_spread(schedule, backup, clock): +def test_tag_second_interval_with_different_spread(schedule, repository, clock): assert ( datetime(2015, 9, 3, 0, 0, 5, tzinfo=UTC), {"daily"}, @@ -60,34 +60,34 @@ def test_tag_second_interval_with_different_spread(schedule, backup, clock): ) -def test_tag_catchup_not_needed_for_recent(schedule, backup, clock): +def test_tag_catchup_not_needed_for_recent(schedule, repository, clock): # A recent backup does not cause catchup to be triggered. revision = mock.Mock() revision.timestamp = clock.now() - timedelta(seconds=15) revision.tags = {"daily"} revision.stats = {"duration": 10} - backup.history.append(revision) - assert set() == schedule._missed(backup) + repository.history.append(revision) + assert set() == schedule._missed(repository) # This in turn causes the main next() function to return the regular next # interval. assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) def test_tag_catchup_does_not_stumble_on_adhoc_tags_in_backup( - schedule, backup, clock + schedule, repository, clock ): revision = mock.Mock() revision.timestamp = clock.now() - timedelta(seconds=15) revision.tags = {"test"} revision.stats = {"duration": 10} - backup.history.append(revision) - assert {"daily"} == schedule._missed(backup) + repository.history.append(revision) + assert {"daily"} == schedule._missed(repository) -def test_tag_catchup_until_5_minutes_before_next(schedule, backup, clock): +def test_tag_catchup_until_5_minutes_before_next(schedule, repository, clock): # If a backup has been overdue for too long, we expect the # tag to be scheduled soon anyway and we do not catch up to avoid # overload issues. @@ -96,29 +96,31 @@ def test_tag_catchup_until_5_minutes_before_next(schedule, backup, clock): revision.tags = {"daily"} revision.stats = {"duration": 10} revision.write_info() - backup.history.append(revision) - assert {"daily"} == schedule._missed(backup) + repository.history.append(revision) + assert {"daily"} == schedule._missed(repository) # This in turn causes the main next() function to return the regular next # interval. assert ( datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), {"daily"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) # As we approach the 5 minute mark before the next regular interval, # we then flip towards the ideal time. clock.now.return_value = datetime(2015, 9, 1, 23, 55, 0, tzinfo=UTC) assert (clock.now(), {"daily"}) == schedule.next( - datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, backup + datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, repository ) clock.now.return_value = datetime(2015, 9, 1, 23, 55, 1, tzinfo=UTC) assert ( datetime(2015, 9, 2, 0, 0, 1, tzinfo=UTC), {"daily"}, - ) == schedule.next(datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, backup) + ) == schedule.next( + datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), 1, repository + ) -def test_tag_catchup_needed_for_recently_missed(backup, clock): +def test_tag_catchup_needed_for_recently_missed(repository, clock): revision = mock.Mock() schedule = backy.schedule.Schedule() @@ -137,60 +139,60 @@ def test_tag_catchup_needed_for_recently_missed(backup, clock): revision.timestamp = clock.now() - timedelta(seconds=(24 * 60 * 60) * 1.2) revision.tags = {"daily"} revision.stats = {"duration": 10} - backup.history.append(revision) + repository.history.append(revision) - assert {"daily", "weekly", "hourly"} == schedule._missed(backup) + assert {"daily", "weekly", "hourly"} == schedule._missed(repository) # This in turn causes the main next() function to also # return this date. assert ( datetime(2015, 9, 1, 7, 6, 47, tzinfo=UTC), {"daily", "weekly", "hourly"}, - ) == schedule.next(clock.now(), 1, backup) + ) == schedule.next(clock.now(), 1, repository) def test_do_not_expire_if_less_than_keep_and_inside_keep_interval( - schedule, backup, clock, log + schedule, repository, clock, log ): def add_revision(timestamp): - revision = Revision.create(backup, {"daily"}, log) - revision.uuid = str(len(backup.history) + 1) + revision = Revision.create(repository, {"daily"}, log) + revision.uuid = str(len(repository.history) + 1) revision.timestamp = timestamp revision.materialize() - backup.history.append(revision) - backup.history.sort(key=lambda x: x.timestamp) + repository.history.append(revision) + repository.history.sort(key=lambda x: x.timestamp) return revision clock.now.return_value = datetime(2014, 5, 10, 10, 0, tzinfo=UTC) add_revision(datetime(2014, 5, 10, 10, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - backup.scan() - assert len(backup.history) == 1 - assert backup.history[0].tags == {"daily"} + assert [] == schedule.expire(repository) + repository.scan() + assert len(repository.history) == 1 + assert repository.history[0].tags == {"daily"} add_revision(datetime(2014, 5, 9, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 8, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 7, 10, 0, tzinfo=UTC)) add_revision(datetime(2014, 5, 6, 10, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - backup.scan() - assert len(backup.history) == 5 - assert [{"daily"}] * 5 == [r.tags for r in backup.history] + assert [] == schedule.expire(repository) + repository.scan() + assert len(repository.history) == 5 + assert [{"daily"}] * 5 == [r.tags for r in repository.history] # This is the one revision more than the basic 'keep' parameter # but its still within the keep*interval frame so we keep it. add_revision(datetime(2014, 5, 6, 11, 0, tzinfo=UTC)) - assert [] == schedule.expire(backup) - assert [{"daily"}] * 6 == [r.tags for r in backup.history] + assert [] == schedule.expire(repository) + assert [{"daily"}] * 6 == [r.tags for r in repository.history] # This revision is more than keep and also outside the interval. # It gets its tag removed and disappears. r = add_revision(datetime(2014, 5, 4, 11, 0, tzinfo=UTC)) - assert r.filename.with_suffix(".rev").exists() - removed = [x for x in schedule.expire(backup)] + assert r.info_filename.exists() + removed = [x for x in schedule.expire(repository)] assert [r.uuid] == [x.uuid for x in removed] - backup.scan() - assert [{"daily"}] * 6 == [rev.tags for rev in backup.history] - assert not r.filename.with_suffix(".rev").exists() + repository.scan() + assert [{"daily"}] * 6 == [rev.tags for rev in repository.history] + assert not r.info_filename.exists() # If we have manual tags, then those do not expire. However, the # known and unknown tag disappear but then the file remains @@ -198,14 +200,14 @@ def add_revision(timestamp): r = add_revision(datetime(2014, 5, 4, 11, 0, tzinfo=UTC)) r.tags = {"daily", "manual:test", "unknown"} r.write_info() - assert r.filename.with_suffix(".rev").exists() - expired = schedule.expire(backup) + assert r.info_filename.exists() + expired = schedule.expire(repository) assert [] == [x.uuid for x in expired] - backup.scan() + repository.scan() assert [{"manual:test"}] + [{"daily"}] * 6 == [ - rev.tags for rev in backup.history + rev.tags for rev in repository.history ] - assert r.filename.with_suffix(".rev").exists() + assert r.info_filename.exists() def test_next_in_interval(clock): diff --git a/src/backy/tests/test_source.py b/src/backy/tests/test_source.py deleted file mode 100644 index 7f254801..00000000 --- a/src/backy/tests/test_source.py +++ /dev/null @@ -1,23 +0,0 @@ -from backy.backup import Backup -from backy.sources.ceph.source import CephRBD - - -def test_configure_ceph_source(tmp_path, log): - with open(str(tmp_path / "config"), "w") as f: - f.write( - """\ ---- - schedule: - daily: - interval: 1d - keep: 7 - source: - type: ceph-rbd - pool: test - image: test04 -""" - ) - backup = Backup(tmp_path, log) - assert isinstance(backup.source, CephRBD) - assert backup.source.pool == "test" - assert backup.source.image == "test04" diff --git a/src/backy/tests/test_timeout.py b/src/backy/tests/test_timeout.py deleted file mode 100644 index fdb18e44..00000000 --- a/src/backy/tests/test_timeout.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from backy.timeout import TimeOut, TimeOutError - - -def test_timeout(capsys): - timeout = TimeOut(0.05, 0.01) - while timeout.tick(): - print("tick") - assert timeout.timed_out - out, err = capsys.readouterr() - assert "tick\ntick\ntick" in out - - -def test_raise_on_timeout(capsys): - timeout = TimeOut(0.05, 0.01, raise_on_timeout=True) - with pytest.raises(TimeOutError): - while True: - timeout.tick() - print("tick") - out, err = capsys.readouterr() - assert "tick\ntick\ntick" in out diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index 91e04723..14c2b1ef 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -9,9 +9,13 @@ from backy.tests import Ellipsis from backy.utils import ( SafeFile, + TimeOut, + TimeOutError, + _fake_fallocate, copy_overwrite, files_are_equal, files_are_roughly_equal, + punch_hole, ) @@ -337,3 +341,62 @@ def test_unmocked_now_returns_time_time_float(): now = backy.utils.now() after = datetime.datetime.now(ZoneInfo("UTC")) assert before <= now <= after + + +@pytest.fixture +def testfile(tmp_path): + fn = str(tmp_path / "myfile") + with open(fn, "wb") as f: + f.write(b"\xde\xad\xbe\xef" * 32) + return fn + + +def test_punch_hole(testfile): + with open(testfile, "r+b") as f: + f.seek(0) + punch_hole(f, 2, 4) + f.seek(0) + assert f.read(8) == b"\xde\xad\x00\x00\x00\x00\xbe\xef" + + +def test_punch_hole_needs_length(testfile): + with pytest.raises(IOError): + with open(testfile, "r+b") as f: + punch_hole(f, 10, 0) + + +def test_punch_hole_needs_writable_file(testfile): + with pytest.raises(OSError): + with open(testfile, "rb") as f: + punch_hole(f, 0, 1) + + +def test_punch_hole_needs_nonnegative_offset(testfile): + with pytest.raises(OSError): + with open(testfile, "r+b") as f: + punch_hole(f, -1, 1) + + +def test_fake_fallocate_only_punches_holes(testfile): + with pytest.raises(NotImplementedError): + with open(testfile, "r+b") as f: + _fake_fallocate(f, 0, 0, 10) + + +def test_timeout(capsys): + timeout = TimeOut(0.05, 0.01) + while timeout.tick(): + print("tick") + assert timeout.timed_out + out, err = capsys.readouterr() + assert "tick\ntick\ntick" in out + + +def test_raise_on_timeout(capsys): + timeout = TimeOut(0.05, 0.01, raise_on_timeout=True) + with pytest.raises(TimeOutError): + while True: + timeout.tick() + print("tick") + out, err = capsys.readouterr() + assert "tick\ntick\ntick" in out diff --git a/src/backy/timeout.py b/src/backy/timeout.py deleted file mode 100644 index 12dfe263..00000000 --- a/src/backy/timeout.py +++ /dev/null @@ -1,51 +0,0 @@ -# Vendored from fc.qemu. - -import time - - -class TimeOutError(RuntimeError): - pass - - -class TimeOut(object): - def __init__(self, timeout, interval=1, raise_on_timeout=False): - """Creates a timeout controller. - - TimeOut is typically used in a while loop to retry a command - for a while, e.g. for polling. Example:: - - timeout = TimeOut() - while timeout.tick(): - do_something - """ - - self.remaining = timeout - self.cutoff = time.time() + timeout - self.interval = interval - self.timed_out = False - self.first = True - self.raise_on_timeout = raise_on_timeout - - def tick(self): - """Perform a `tick` for this timeout. - - Returns True if we should keep going or False if not. - - Instead of returning False this can raise an exception - if raise_on_timeout is set. - """ - - self.remaining = self.cutoff - time.time() - self.timed_out = self.remaining <= 0 - - if self.timed_out: - if self.raise_on_timeout: - raise TimeOutError() - else: - return False - - if self.first: - self.first = False - else: - time.sleep(self.interval) - return True diff --git a/src/backy/utils.py b/src/backy/utils.py index 90357ac9..d4c8fa2e 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -1,6 +1,8 @@ import asyncio import base64 import contextlib +import ctypes +import ctypes.util import datetime import hashlib import mmap @@ -12,8 +14,9 @@ import time import typing from asyncio import Event -from os import DirEntry -from typing import IO, Callable, Iterable, List, Literal, Optional, TypeVar +from json import JSONEncoder +from pathlib import Path +from typing import IO, Any, Callable, Iterable, List, Literal, Optional, TypeVar from zoneinfo import ZoneInfo import aiofiles.os as aos @@ -22,7 +25,6 @@ import tzlocal from .ext_deps import CP -from .fallocate import punch_hole _T = TypeVar("_T") _U = TypeVar("_U") @@ -508,29 +510,152 @@ def duplicates(a: List[_T], b: List[_T]) -> List[_T]: return unique(i for i in a if i in b) -def list_rindex(l: List[_T], v: _T) -> int: - return len(l) - l[-1::-1].index(v) - 1 +def list_rindex(L: List[_T], v: _T) -> int: + return len(L) - L[-1::-1].index(v) - 1 @typing.overload -def list_get(l: List[_T], i: int) -> _T | None: +def list_get(L: List[_T], i: int) -> _T | None: ... @typing.overload -def list_get(l: List[_T], i: int, default: _U) -> _T | _U: +def list_get(L: List[_T], i: int, default: _U) -> _T | _U: ... -def list_get(l, i, default=None): - return l[i] if -len(l) <= i < len(l) else default +def list_get(L, i, default=None): + return L[i] if -len(L) <= i < len(L) else default -def list_split(l: List[_T], v: _T) -> List[List[_T]]: +def list_split(L: List[_T], v: _T) -> List[List[_T]]: res: List[List[_T]] = [[]] - for i in l: + for i in L: if i == v: res.append([]) else: res[-1].append(i) return res + + +class TimeOutError(RuntimeError): + pass + + +class TimeOut(object): + def __init__(self, timeout, interval=1, raise_on_timeout=False): + """Creates a timeout controller. + + TimeOut is typically used in a while loop to retry a command + for a while, e.g. for polling. Example:: + + timeout = TimeOut() + while timeout.tick(): + do_something + """ + + self.remaining = timeout + self.cutoff = time.time() + timeout + self.interval = interval + self.timed_out = False + self.first = True + self.raise_on_timeout = raise_on_timeout + + def tick(self): + """Perform a `tick` for this timeout. + + Returns True if we should keep going or False if not. + + Instead of returning False this can raise an exception + if raise_on_timeout is set. + """ + + self.remaining = self.cutoff - time.time() + self.timed_out = self.remaining <= 0 + + if self.timed_out: + if self.raise_on_timeout: + raise TimeOutError() + else: + return False + + if self.first: + self.first = False + else: + time.sleep(self.interval) + return True + + +# Adapted from +# https://github.com/trbs/fallocate/issues/4 + + +log = structlog.stdlib.get_logger() + +FALLOC_FL_KEEP_SIZE = 0x01 +FALLOC_FL_PUNCH_HOLE = 0x02 + + +def _fake_fallocate(fd, mode, offset, len_): + log.debug("fallocate-non-hole-punching") + if len_ <= 0: + raise IOError("fallocate: length must be positive") + if mode & FALLOC_FL_PUNCH_HOLE: + old = fd.tell() + fd.seek(offset) + fd.write(b"\x00" * len_) + fd.seek(old) + else: + raise NotImplementedError( + "fake fallocate() supports only hole punching" + ) + + +def _make_fallocate(): + libc_name = ctypes.util.find_library("c") + libc = ctypes.CDLL(libc_name, use_errno=True) + _fallocate = libc.fallocate + c_off_t = ctypes.c_size_t + _fallocate.restype = ctypes.c_int + _fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t] + + def fallocate(fd, mode, offset, len_): + if len_ <= 0: + raise IOError("fallocate: length must be positive") + res = _fallocate(fd.fileno(), mode, offset, len_) + if res != 0: + errno = ctypes.get_errno() + raise OSError(errno, "fallocate: " + os.strerror(errno)) + + return fallocate + + +try: + fallocate = _make_fallocate() +except AttributeError: # pragma: no cover + fallocate = _fake_fallocate + + +def punch_hole(f, offset, len_): + """Ensure that the specified byte range is zeroed. + + Depending on the availability of fallocate(), this is either + delegated to the kernel or done manualy. + """ + params = (f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, offset, len_) + try: + fallocate(*params) + except OSError: + _fake_fallocate(*params) + + +class BackyJSONEncoder(JSONEncoder): + def default(self, o: Any) -> Any: + if hasattr(o, "to_dict"): + return o.to_dict() + elif isinstance(o, datetime.datetime): + return o.isoformat() + elif isinstance(o, Path): + return str(o) + else: + super().default(o)