From dc323d3575d8e85ee9a91ba3e14017654522868a Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Wed, 6 Mar 2024 01:23:40 +0100 Subject: [PATCH] mypy: check untyped defs --- .pre-commit-config.yaml | 6 +++- src/backy/backends/__init__.py | 4 +-- src/backy/backends/chunked/__init__.py | 32 +++++++++---------- src/backy/backends/chunked/chunk.py | 7 ++-- src/backy/backends/chunked/store.py | 11 +++---- src/backy/backends/chunked/tests/test_file.py | 8 ++--- src/backy/backup.py | 16 ++++++---- src/backy/client.py | 3 +- src/backy/daemon.py | 10 +++--- src/backy/main.py | 1 + src/backy/revision.py | 4 +-- src/backy/schedule.py | 4 ++- src/backy/scheduler.py | 7 ++-- src/backy/tests/__init__.py | 2 +- src/backy/tests/test_backup.py | 2 -- src/backy/tests/test_daemon.py | 4 +-- src/backy/utils.py | 24 ++++++++------ 17 files changed, 80 insertions(+), 65 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f6df610..9eb2f4f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,5 +32,9 @@ repos: - types-PyYAML==5.4.0 - types-setuptools - types-tzlocal==4.2 + exclude: tests + args: + - --check-untyped-defs + - --ignore-missing-imports repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.0.1' + rev: 'v1.8.0' diff --git a/src/backy/backends/__init__.py b/src/backy/backends/__init__.py index 02161f78..e25b66bb 100644 --- a/src/backy/backends/__init__.py +++ b/src/backy/backends/__init__.py @@ -27,5 +27,5 @@ def purge(self) -> None: def verify(self) -> None: pass - def scrub(self, backup: "Backup", type: str) -> None: - pass + def scrub(self, backup: "Backup", type: str) -> int: + return 0 diff --git a/src/backy/backends/chunked/__init__.py b/src/backy/backends/chunked/__init__.py index 05934afb..595f32dc 100644 --- a/src/backy/backends/chunked/__init__.py +++ b/src/backy/backends/chunked/__init__.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import IO, cast +from typing import Set from structlog.stdlib import BoundLogger @@ -24,13 +24,13 @@ class ChunkedFileBackend(BackyBackend): def __init__(self, revision: Revision, log: BoundLogger): self.backup = revision.backup self.revision = revision - path = self.revision.backup.path / "chunks" + path = self.backup.path / "chunks" if path not in self.STORES: - self.STORES[path] = Store(self.revision.backup.path / "chunks", log) + self.STORES[path] = Store(self.backup.path / "chunks", log) self.store = self.STORES[path] self.log = log.bind(subsystem="chunked") - def open(self, mode: str = "rb") -> IO: + def open(self, mode: str = "rb") -> File: # type: ignore[override] if "w" in mode or "+" in mode and self.clone_parent: parent = self.revision.get_parent() if parent and not self.revision.filename.exists(): @@ -50,32 +50,32 @@ def open(self, mode: str = "rb") -> IO: self.log.warn("forcing-full") self.store.force_writes = True - return cast(IO, file) + return file - def purge(self): + def purge(self) -> None: self.log.debug("purge") - self.store.users = [] + used_chunks: Set[str] = set() for revision in self.backup.history: try: - self.store.users.append( - self.backup.backend_factory(revision, self.log).open() + used_chunks |= set( + type(self)(revision, self.log).open()._mapping.values() ) except ValueError: # Invalid format, like purging non-chunked with chunked backend pass - self.store.purge() + self.store.purge(used_chunks) @report_status def verify(self): log = self.log.bind(revision_uuid=self.revision.uuid) log.info("verify-start") - verified_chunks = set() + verified_chunks: Set[str] = set() # Load verified chunks to avoid duplicate work for revision in self.backup.clean_history: if revision.trust != Trust.VERIFIED: continue - f = self.backup.backend_factory(revision, log).open() + f = type(self)(revision, log).open() verified_chunks.update(f._mapping.values()) log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks)) @@ -125,7 +125,7 @@ def verify(self): yield END yield None - def scrub(self, backup, type): + def scrub(self, backup, type: str) -> int: if type == "light": return self.scrub_light(backup) elif type == "deep": @@ -133,12 +133,12 @@ def scrub(self, backup, type): else: raise RuntimeError("Invalid scrubbing type {}".format(type)) - def scrub_light(self, backup): + def scrub_light(self, backup) -> int: errors = 0 self.log.info("scrub-light") for revision in backup.history: self.log.info("scrub-light-rev", revision_uuid=revision.uuid) - backend = backup.backend_factory(revision, self.log).open() + backend = type(self)(revision, self.log).open() for hash in backend._mapping.values(): if backend.store.chunk_path(hash).exists(): continue @@ -150,7 +150,7 @@ def scrub_light(self, backup): errors += 1 return errors - def scrub_deep(self, backup): + def scrub_deep(self, backup) -> int: errors = self.scrub_light(backup) self.log.info("scrub-deep") errors += self.store.validate_chunks() diff --git a/src/backy/backends/chunked/chunk.py b/src/backy/backends/chunked/chunk.py index e2a343ca..f2c677f2 100644 --- a/src/backy/backends/chunked/chunk.py +++ b/src/backy/backends/chunked/chunk.py @@ -75,7 +75,7 @@ def _read_existing(self): chunk_file = self.store.chunk_path(self.hash) try: with open(chunk_file, "rb") as f: - posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) + posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore data = f.read() data = lzo.decompress(data) except (lzo.error, IOError) as e: @@ -97,6 +97,7 @@ def read(self, offset, size=-1): Return the data and the remaining size that should be read. """ self._read_existing() + assert self.data is not None self._touch() self.data.seek(offset) @@ -124,6 +125,7 @@ def write(self, offset, data): chunk_stats["write_full"] += 1 else: self._read_existing() + assert self.data is not None self.data.seek(offset) self.data.write(data) chunk_stats["write_partial"] += 1 @@ -145,7 +147,7 @@ def flush(self): # of our change - avoid renaming between multiple directories to # reduce traffic on the directory nodes. fd, tmpfile_name = tempfile.mkstemp(dir=target.parent) - posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) + posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore with os.fdopen(fd, mode="wb") as f: data = lzo.compress(self.data.getvalue()) f.write(data) @@ -160,6 +162,7 @@ def flush(self): def _update_hash(self): # I'm not using read() here to a) avoid cache accounting and b) # use a faster path to get the data. + assert self.data is not None data = self.data.getvalue() self.hash = hash(data) self.file._mapping[self.id] = self.hash diff --git a/src/backy/backends/chunked/store.py b/src/backy/backends/chunked/store.py index a95de3fe..a8e4aad2 100644 --- a/src/backy/backends/chunked/store.py +++ b/src/backy/backends/chunked/store.py @@ -1,11 +1,12 @@ from pathlib import Path +from typing import Set import lzo from structlog.stdlib import BoundLogger from backy.utils import END, report_status -from . import File, chunk +from . import chunk # A chunkstore, is responsible for all revisions for a single backup, for now. # We can start having statistics later how much reuse between images is @@ -24,14 +25,12 @@ class Store(object): force_writes = False path: Path - users: list[File] seen_forced: set[str] known: set[str] log: BoundLogger def __init__(self, path: Path, log): self.path = path - self.users = [] self.seen_forced = set() self.known = set() self.log = log.bind(subsystem="chunked-store") @@ -91,12 +90,10 @@ def ls(self): hash = file.name.removesuffix(".chunk.lzo") yield file, hash, lambda f: lzo.decompress(open(f, "rb").read()) - def purge(self) -> None: + def purge(self, used_chunks: Set[str]) -> None: # This assumes exclusive lock on the store. This is guaranteed by # backy's main locking. - to_delete = self.known.copy() - for user in self.users: - to_delete = to_delete - set(user._mapping.values()) + to_delete = self.known - used_chunks self.log.info("purge", purging=len(to_delete)) for file_hash in sorted(to_delete): self.chunk_path(file_hash).unlink(missing_ok=True) diff --git a/src/backy/backends/chunked/tests/test_file.py b/src/backy/backends/chunked/tests/test_file.py index 8854627f..204bfd38 100644 --- a/src/backy/backends/chunked/tests/test_file.py +++ b/src/backy/backends/chunked/tests/test_file.py @@ -116,7 +116,7 @@ def test_file_seek(tmp_path, log): assert f.tell() == 10 f.close() f = File(tmp_path / "asdf", store, mode="r") - f.read() == b"asdf\x00\x00\x00\x00\x00\x00" + assert f.read() == b"asdf\x00\x00\x00\x00\x00\x00" f.close() @@ -151,8 +151,7 @@ def test_continuously_updated_file(tmp_path, log): store.validate_chunks() - store.users.append(f) - store.purge() + store.purge(set(f._mapping.values())) sample.close() f.close() @@ -192,8 +191,7 @@ def test_seeky_updated_file(tmp_path, log): store.validate_chunks() - store.users.append(f) - store.purge() + store.purge(set(f._mapping.values())) sample.close() f.close() diff --git a/src/backy/backup.py b/src/backy/backup.py index 896c8a42..262251ae 100644 --- a/src/backy/backup.py +++ b/src/backy/backup.py @@ -7,7 +7,7 @@ from enum import Enum from math import ceil, floor from pathlib import Path -from typing import IO, List, Optional, Type +from typing import IO, List, Literal, Optional, Type import tzlocal import yaml @@ -111,7 +111,7 @@ class Backup(object): config: dict schedule: Schedule source: BackySourceFactory - backend_type: str + backend_type: Literal["cowfile", "chunked"] backend_factory: Type[BackyBackend] history: list[Revision] quarantine: QuarantineStore @@ -380,7 +380,7 @@ def restore_file(self, source, target): open(target, "ab").close() # touch into existence with open(target, "r+b", buffering=CHUNK_SIZE) as target: try: - posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) + posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore except Exception: pass copy(source, target) @@ -390,7 +390,7 @@ def restore_stdout(self, source): """Emit restore data to stdout (for pipe processing).""" self.log.debug("restore-stdout", source=source.name) try: - posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) + posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore except Exception: pass with os.fdopen(os.dup(1), "wb") as target: @@ -414,11 +414,11 @@ def upgrade(self): from backy.backends.chunked import ChunkedFileBackend from backy.sources.file import File - last_worklist = [] + last_worklist: List[Revision] = [] while True: self.scan() - to_upgrade = [ + to_upgrade: List[Revision] = [ r for r in self.clean_history if r.backend_type == "cowfile" ] if not to_upgrade: @@ -442,7 +442,9 @@ def upgrade(self): revision_uuid=revision.uuid, timestamp=revision.timestamp, ) - original_file = revision.filename + ".old" + original_file = revision.filename.with_suffix( + revision.filename.suffix + ".old" + ) if not os.path.exists(original_file): # We may be resuming a partial upgrade. Only move the file # if our .old doesn't exist. diff --git a/src/backy/client.py b/src/backy/client.py index 2004f18b..485c1ccb 100644 --- a/src/backy/client.py +++ b/src/backy/client.py @@ -1,6 +1,7 @@ import datetime import sys from asyncio import get_running_loop +from typing import Dict import aiohttp import humanize @@ -149,7 +150,7 @@ async def jobs(self, filter_re=""): async def status(self): """Show job status overview""" t = Table("Status", "#") - state_summary = {} + state_summary: Dict[str, int] = {} jobs = await self.api.get_jobs() for job in jobs: state_summary.setdefault(job["status"], 0) diff --git a/src/backy/daemon.py b/src/backy/daemon.py index cfb62898..40a91882 100644 --- a/src/backy/daemon.py +++ b/src/backy/daemon.py @@ -128,7 +128,7 @@ def _apply_config(self): if ( not self.backup_semaphores - or self.backup_semaphores["slow"]._bound_value != self.worker_limit + or self.backup_semaphores["slow"]._bound_value != self.worker_limit # type: ignore ): # Adjusting the settings of a semaphore is not obviously simple. So # here is a simplified version with hopefully clear semantics: @@ -187,6 +187,7 @@ def handle_signals(signum): loop.add_signal_handler(sig, handle_signals, sig) def run_forever(self): + assert self.loop self.log.info("starting-loop") self.loop.run_forever() self.loop.close() @@ -227,20 +228,21 @@ async def api_server_loop(self): def terminate(self): self.log.info("terminating") for task in asyncio.all_tasks(): - if task.get_coro().__name__ == "async_finalizer": + if task.get_coro().__name__ == "async_finalizer": # type: ignore # Support pytest-asyncio integration. continue - if task.get_coro().__name__.startswith("test_"): + if task.get_coro().__name__.startswith("test_"): # type: ignore # Support pytest-asyncio integration. continue self.log.debug( "cancelling-task", name=task.get_name(), - coro_name=task.get_coro().__name__, + coro_name=task.get_coro().__name__, # type: ignore ) task.cancel() async def shutdown_loop(self): + assert self.loop self.log.debug("waiting-shutdown") while True: try: diff --git a/src/backy/main.py b/src/backy/main.py index 0066595b..18dd7e1e 100644 --- a/src/backy/main.py +++ b/src/backy/main.py @@ -422,6 +422,7 @@ def main(): if not hasattr(args, "logfile"): args.logfile = None + default_logfile: Optional[Path] match args.func: case "scheduler": default_logfile = Path("/var/log/backy.log") diff --git a/src/backy/revision.py b/src/backy/revision.py index 88ca1c24..d19cf9bd 100644 --- a/src/backy/revision.py +++ b/src/backy/revision.py @@ -1,7 +1,7 @@ import datetime from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Literal, Optional import shortuuid import yaml @@ -38,7 +38,7 @@ class Revision(object): stats: dict tags: set[str] trust: Trust = Trust.TRUSTED - backend_type: str = "chunked" + backend_type: Literal["cowfile", "chunked"] = "chunked" log: BoundLogger def __init__( diff --git a/src/backy/schedule.py b/src/backy/schedule.py index 368bc0dc..244712b5 100644 --- a/src/backy/schedule.py +++ b/src/backy/schedule.py @@ -1,5 +1,7 @@ import copy +import datetime from datetime import timedelta +from typing import Dict import backy.utils from backy.revision import filter_schedule_tags @@ -80,7 +82,7 @@ def next(self, relative, spread, archive): return time, tags def _next_ideal(self, relative, spread): - next_times = {} + next_times: Dict[datetime.datetime, set] = {} for tag, settings in self.schedule.items(): t = next_times.setdefault( next_in_interval(relative, settings["interval"], spread), set() diff --git a/src/backy/scheduler.py b/src/backy/scheduler.py index 87fce6cb..4a0455a5 100644 --- a/src/backy/scheduler.py +++ b/src/backy/scheduler.py @@ -27,9 +27,9 @@ class Job(object): status: str = "" next_time: Optional[datetime.datetime] = None next_tags: Optional[set[str]] = None - path: Optional[Path] = None - backup: Optional[Backup] = None - logfile: Optional[Path] = None + path: Path + backup: Backup + logfile: Path last_config: Optional[dict] = None daemon: "backy.daemon.BackyDaemon" run_immediately: asyncio.Event @@ -341,6 +341,7 @@ async def run_callback(self): def start(self): assert self._task is None + assert self.daemon.loop self._task = self.daemon.loop.create_task( self.run_forever(), name=f"backup-loop-{self.name}" ) diff --git a/src/backy/tests/__init__.py b/src/backy/tests/__init__.py index eef3ef5b..8a8dfc63 100644 --- a/src/backy/tests/__init__.py +++ b/src/backy/tests/__init__.py @@ -28,7 +28,7 @@ def diff(self): result.append("- " + pattern) else: diffed = d.compare([pattern], [line]) - diffed = [x.rstrip("\n") for x in diffed] + diffed = (x.rstrip("\n") for x in diffed) result.extend(diffed) result = list(filter(str.strip, result)) return result diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index 1ed097d6..32f86402 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -3,10 +3,8 @@ from unittest import mock import pytest -import yaml import backy.utils -from backy.backup import Backup from backy.revision import Revision from backy.sources.file import File from backy.utils import CHUNK_SIZE diff --git a/src/backy/tests/test_daemon.py b/src/backy/tests/test_daemon.py index 880af14a..7ccc9f49 100644 --- a/src/backy/tests/test_daemon.py +++ b/src/backy/tests/test_daemon.py @@ -4,7 +4,6 @@ import re import signal from pathlib import Path -from unittest import mock import pytest import yaml @@ -108,6 +107,8 @@ def test_reload(daemon, tmp_path): async def test_sighup(daemon, log, monkeypatch): """test that a `SIGHUP` causes a reload without interrupting other tasks""" + all_tasks = set() + def reload(): nonlocal all_tasks all_tasks = asyncio.all_tasks() @@ -116,7 +117,6 @@ def reload(): async def send_sighup(): os.kill(os.getpid(), signal.SIGHUP) - all_tasks = [] reloaded = asyncio.Event() monkeypatch.setattr(daemon, "reload", reload) diff --git a/src/backy/utils.py b/src/backy/utils.py index 314f6681..4b851989 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -11,7 +11,7 @@ import time import typing from pathlib import Path -from typing import IO, Callable, Iterable, List, TypeVar +from typing import IO, Callable, Iterable, List, Optional, TypeVar from zoneinfo import ZoneInfo import humanize @@ -92,6 +92,7 @@ class SafeFile(object): """ protected_mode = 0o440 + f: Optional[IO] def __init__(self, filename: str | os.PathLike, encoding=None, sync=True): self.filename = filename @@ -119,7 +120,7 @@ def __exit__(self, exc_type=None, exc_value=None, exc_tb=None): os.fsync(self.f) self.f.close() - if self.use_write_protection: + if self.write_protected: os.chmod(self.f.name, self.protected_mode) if os.path.exists(self.filename): os.chmod(self.filename, self.protected_mode) @@ -136,7 +137,7 @@ def __exit__(self, exc_type=None, exc_value=None, exc_tb=None): def open_new(self, mode): """Open this as a new (temporary) file that will be renamed on close.""" - assert not self.f + assert self.f is None self.f = tempfile.NamedTemporaryFile( mode, dir=os.path.dirname(self.filename), delete=False ) @@ -144,19 +145,17 @@ def open_new(self, mode): def open_copy(self, mode): """Open an existing file, make a copy first, rename on close.""" - assert not self.f - + self.open_new("wb") + assert self.f is not None if os.path.exists(self.filename): - self.open_new("wb") cp_reflink(self.filename, self.f.name) - self.f.close() - + self.f.close() self.f = open(self.f.name, mode) def open_inplace(self, mode): """Open as an existing file, in-place.""" assert not self.f - if self.use_write_protection and os.path.exists(self.filename): + if self.write_protected and os.path.exists(self.filename): # This is kinda dangerous, but this is a tool that only protects # you so far and doesn't try to get in your way if you really need # to do this. @@ -177,29 +176,36 @@ def use_write_protection(self): @property def name(self): + assert self.f is not None return self.f.name def read(self, *args, **kw): + assert self.f is not None data = self.f.read(*args, **kw) if self.encoding: data = data.decode(self.encoding) return data def write(self, data): + assert self.f is not None if self.encoding: data = data.encode(self.encoding) self.f.write(data) def seek(self, offset, whence=0): + assert self.f is not None return self.f.seek(offset, whence) def tell(self): + assert self.f is not None return self.f.tell() def truncate(self, size=None): + assert self.f is not None return self.f.truncate(size) def fileno(self): + assert self.f is not None return self.f.fileno()