Skip to content

Commit

Permalink
mypy: check untyped defs
Browse files Browse the repository at this point in the history
  • Loading branch information
Johann Bahl committed Mar 6, 2024
1 parent f4ed3da commit dc323d3
Show file tree
Hide file tree
Showing 17 changed files with 80 additions and 65 deletions.
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,9 @@ repos:
- types-PyYAML==5.4.0
- types-setuptools
- types-tzlocal==4.2
exclude: tests
args:
- --check-untyped-defs
- --ignore-missing-imports
repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.0.1'
rev: 'v1.8.0'
4 changes: 2 additions & 2 deletions src/backy/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ def purge(self) -> None:
def verify(self) -> None:
pass

def scrub(self, backup: "Backup", type: str) -> None:
pass
def scrub(self, backup: "Backup", type: str) -> int:
return 0
32 changes: 16 additions & 16 deletions src/backy/backends/chunked/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import IO, cast
from typing import Set

from structlog.stdlib import BoundLogger

Expand All @@ -24,13 +24,13 @@ class ChunkedFileBackend(BackyBackend):
def __init__(self, revision: Revision, log: BoundLogger):
self.backup = revision.backup
self.revision = revision
path = self.revision.backup.path / "chunks"
path = self.backup.path / "chunks"
if path not in self.STORES:
self.STORES[path] = Store(self.revision.backup.path / "chunks", log)
self.STORES[path] = Store(self.backup.path / "chunks", log)
self.store = self.STORES[path]
self.log = log.bind(subsystem="chunked")

def open(self, mode: str = "rb") -> IO:
def open(self, mode: str = "rb") -> File: # type: ignore[override]
if "w" in mode or "+" in mode and self.clone_parent:
parent = self.revision.get_parent()
if parent and not self.revision.filename.exists():
Expand All @@ -50,32 +50,32 @@ def open(self, mode: str = "rb") -> IO:
self.log.warn("forcing-full")
self.store.force_writes = True

return cast(IO, file)
return file

def purge(self):
def purge(self) -> None:
self.log.debug("purge")
self.store.users = []
used_chunks: Set[str] = set()
for revision in self.backup.history:
try:
self.store.users.append(
self.backup.backend_factory(revision, self.log).open()
used_chunks |= set(
type(self)(revision, self.log).open()._mapping.values()
)
except ValueError:
# Invalid format, like purging non-chunked with chunked backend
pass
self.store.purge()
self.store.purge(used_chunks)

@report_status
def verify(self):
log = self.log.bind(revision_uuid=self.revision.uuid)
log.info("verify-start")
verified_chunks = set()
verified_chunks: Set[str] = set()

# Load verified chunks to avoid duplicate work
for revision in self.backup.clean_history:
if revision.trust != Trust.VERIFIED:
continue
f = self.backup.backend_factory(revision, log).open()
f = type(self)(revision, log).open()
verified_chunks.update(f._mapping.values())

log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks))
Expand Down Expand Up @@ -125,20 +125,20 @@ def verify(self):
yield END
yield None

def scrub(self, backup, type):
def scrub(self, backup, type: str) -> int:
if type == "light":
return self.scrub_light(backup)
elif type == "deep":
return self.scrub_deep(backup)
else:
raise RuntimeError("Invalid scrubbing type {}".format(type))

def scrub_light(self, backup):
def scrub_light(self, backup) -> int:
errors = 0
self.log.info("scrub-light")
for revision in backup.history:
self.log.info("scrub-light-rev", revision_uuid=revision.uuid)
backend = backup.backend_factory(revision, self.log).open()
backend = type(self)(revision, self.log).open()
for hash in backend._mapping.values():
if backend.store.chunk_path(hash).exists():
continue
Expand All @@ -150,7 +150,7 @@ def scrub_light(self, backup):
errors += 1
return errors

def scrub_deep(self, backup):
def scrub_deep(self, backup) -> int:
errors = self.scrub_light(backup)
self.log.info("scrub-deep")
errors += self.store.validate_chunks()
Expand Down
7 changes: 5 additions & 2 deletions src/backy/backends/chunked/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _read_existing(self):
chunk_file = self.store.chunk_path(self.hash)
try:
with open(chunk_file, "rb") as f:
posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore
data = f.read()
data = lzo.decompress(data)
except (lzo.error, IOError) as e:
Expand All @@ -97,6 +97,7 @@ def read(self, offset, size=-1):
Return the data and the remaining size that should be read.
"""
self._read_existing()
assert self.data is not None
self._touch()

self.data.seek(offset)
Expand Down Expand Up @@ -124,6 +125,7 @@ def write(self, offset, data):
chunk_stats["write_full"] += 1
else:
self._read_existing()
assert self.data is not None
self.data.seek(offset)
self.data.write(data)
chunk_stats["write_partial"] += 1
Expand All @@ -145,7 +147,7 @@ def flush(self):
# of our change - avoid renaming between multiple directories to
# reduce traffic on the directory nodes.
fd, tmpfile_name = tempfile.mkstemp(dir=target.parent)
posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore
with os.fdopen(fd, mode="wb") as f:
data = lzo.compress(self.data.getvalue())
f.write(data)
Expand All @@ -160,6 +162,7 @@ def flush(self):
def _update_hash(self):
# I'm not using read() here to a) avoid cache accounting and b)
# use a faster path to get the data.
assert self.data is not None
data = self.data.getvalue()
self.hash = hash(data)
self.file._mapping[self.id] = self.hash
Expand Down
11 changes: 4 additions & 7 deletions src/backy/backends/chunked/store.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from pathlib import Path
from typing import Set

import lzo
from structlog.stdlib import BoundLogger

from backy.utils import END, report_status

from . import File, chunk
from . import chunk

# A chunkstore, is responsible for all revisions for a single backup, for now.
# We can start having statistics later how much reuse between images is
Expand All @@ -24,14 +25,12 @@ class Store(object):
force_writes = False

path: Path
users: list[File]
seen_forced: set[str]
known: set[str]
log: BoundLogger

def __init__(self, path: Path, log):
self.path = path
self.users = []
self.seen_forced = set()
self.known = set()
self.log = log.bind(subsystem="chunked-store")
Expand Down Expand Up @@ -91,12 +90,10 @@ def ls(self):
hash = file.name.removesuffix(".chunk.lzo")
yield file, hash, lambda f: lzo.decompress(open(f, "rb").read())

def purge(self) -> None:
def purge(self, used_chunks: Set[str]) -> None:
# This assumes exclusive lock on the store. This is guaranteed by
# backy's main locking.
to_delete = self.known.copy()
for user in self.users:
to_delete = to_delete - set(user._mapping.values())
to_delete = self.known - used_chunks
self.log.info("purge", purging=len(to_delete))
for file_hash in sorted(to_delete):
self.chunk_path(file_hash).unlink(missing_ok=True)
Expand Down
8 changes: 3 additions & 5 deletions src/backy/backends/chunked/tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_file_seek(tmp_path, log):
assert f.tell() == 10
f.close()
f = File(tmp_path / "asdf", store, mode="r")
f.read() == b"asdf\x00\x00\x00\x00\x00\x00"
assert f.read() == b"asdf\x00\x00\x00\x00\x00\x00"

f.close()

Expand Down Expand Up @@ -151,8 +151,7 @@ def test_continuously_updated_file(tmp_path, log):

store.validate_chunks()

store.users.append(f)
store.purge()
store.purge(set(f._mapping.values()))

sample.close()
f.close()
Expand Down Expand Up @@ -192,8 +191,7 @@ def test_seeky_updated_file(tmp_path, log):

store.validate_chunks()

store.users.append(f)
store.purge()
store.purge(set(f._mapping.values()))

sample.close()
f.close()
Expand Down
16 changes: 9 additions & 7 deletions src/backy/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from enum import Enum
from math import ceil, floor
from pathlib import Path
from typing import IO, List, Optional, Type
from typing import IO, List, Literal, Optional, Type

import tzlocal
import yaml
Expand Down Expand Up @@ -111,7 +111,7 @@ class Backup(object):
config: dict
schedule: Schedule
source: BackySourceFactory
backend_type: str
backend_type: Literal["cowfile", "chunked"]
backend_factory: Type[BackyBackend]
history: list[Revision]
quarantine: QuarantineStore
Expand Down Expand Up @@ -380,7 +380,7 @@ def restore_file(self, source, target):
open(target, "ab").close() # touch into existence
with open(target, "r+b", buffering=CHUNK_SIZE) as target:
try:
posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) # type: ignore
except Exception:
pass
copy(source, target)
Expand All @@ -390,7 +390,7 @@ def restore_stdout(self, source):
"""Emit restore data to stdout (for pipe processing)."""
self.log.debug("restore-stdout", source=source.name)
try:
posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL)
posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore
except Exception:
pass
with os.fdopen(os.dup(1), "wb") as target:
Expand All @@ -414,11 +414,11 @@ def upgrade(self):
from backy.backends.chunked import ChunkedFileBackend
from backy.sources.file import File

last_worklist = []
last_worklist: List[Revision] = []

while True:
self.scan()
to_upgrade = [
to_upgrade: List[Revision] = [
r for r in self.clean_history if r.backend_type == "cowfile"
]
if not to_upgrade:
Expand All @@ -442,7 +442,9 @@ def upgrade(self):
revision_uuid=revision.uuid,
timestamp=revision.timestamp,
)
original_file = revision.filename + ".old"
original_file = revision.filename.with_suffix(
revision.filename.suffix + ".old"
)
if not os.path.exists(original_file):
# We may be resuming a partial upgrade. Only move the file
# if our .old doesn't exist.
Expand Down
3 changes: 2 additions & 1 deletion src/backy/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import sys
from asyncio import get_running_loop
from typing import Dict

import aiohttp
import humanize
Expand Down Expand Up @@ -149,7 +150,7 @@ async def jobs(self, filter_re=""):
async def status(self):
"""Show job status overview"""
t = Table("Status", "#")
state_summary = {}
state_summary: Dict[str, int] = {}
jobs = await self.api.get_jobs()
for job in jobs:
state_summary.setdefault(job["status"], 0)
Expand Down
10 changes: 6 additions & 4 deletions src/backy/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _apply_config(self):

if (
not self.backup_semaphores
or self.backup_semaphores["slow"]._bound_value != self.worker_limit
or self.backup_semaphores["slow"]._bound_value != self.worker_limit # type: ignore
):
# Adjusting the settings of a semaphore is not obviously simple. So
# here is a simplified version with hopefully clear semantics:
Expand Down Expand Up @@ -187,6 +187,7 @@ def handle_signals(signum):
loop.add_signal_handler(sig, handle_signals, sig)

def run_forever(self):
assert self.loop
self.log.info("starting-loop")
self.loop.run_forever()
self.loop.close()
Expand Down Expand Up @@ -227,20 +228,21 @@ async def api_server_loop(self):
def terminate(self):
self.log.info("terminating")
for task in asyncio.all_tasks():
if task.get_coro().__name__ == "async_finalizer":
if task.get_coro().__name__ == "async_finalizer": # type: ignore
# Support pytest-asyncio integration.
continue
if task.get_coro().__name__.startswith("test_"):
if task.get_coro().__name__.startswith("test_"): # type: ignore
# Support pytest-asyncio integration.
continue
self.log.debug(
"cancelling-task",
name=task.get_name(),
coro_name=task.get_coro().__name__,
coro_name=task.get_coro().__name__, # type: ignore
)
task.cancel()

async def shutdown_loop(self):
assert self.loop
self.log.debug("waiting-shutdown")
while True:
try:
Expand Down
1 change: 1 addition & 0 deletions src/backy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ def main():
if not hasattr(args, "logfile"):
args.logfile = None

default_logfile: Optional[Path]
match args.func:
case "scheduler":
default_logfile = Path("/var/log/backy.log")
Expand Down
4 changes: 2 additions & 2 deletions src/backy/revision.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING, Literal, Optional

import shortuuid
import yaml
Expand Down Expand Up @@ -38,7 +38,7 @@ class Revision(object):
stats: dict
tags: set[str]
trust: Trust = Trust.TRUSTED
backend_type: str = "chunked"
backend_type: Literal["cowfile", "chunked"] = "chunked"
log: BoundLogger

def __init__(
Expand Down
4 changes: 3 additions & 1 deletion src/backy/schedule.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import copy
import datetime
from datetime import timedelta
from typing import Dict

import backy.utils
from backy.revision import filter_schedule_tags
Expand Down Expand Up @@ -80,7 +82,7 @@ def next(self, relative, spread, archive):
return time, tags

def _next_ideal(self, relative, spread):
next_times = {}
next_times: Dict[datetime.datetime, set] = {}
for tag, settings in self.schedule.items():
t = next_times.setdefault(
next_in_interval(relative, settings["interval"], spread), set()
Expand Down
Loading

0 comments on commit dc323d3

Please sign in to comment.