diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f6df610..3557a417 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,5 +32,6 @@ repos: - types-PyYAML==5.4.0 - types-setuptools - types-tzlocal==4.2 + - types-aiofiles==23.2 repo: https://github.com/pre-commit/mirrors-mypy rev: 'v1.0.1' diff --git a/lib.nix b/lib.nix index 890d9754..354a4c8c 100644 --- a/lib.nix +++ b/lib.nix @@ -39,6 +39,9 @@ let # replace poetry to avoid dependency on vulnerable python-cryptography package nativeBuildInputs = [ super.poetry-core ] ++ builtins.filter (p: p.pname or "" != "poetry") old.nativeBuildInputs; }); + aiofiles = super.aiofiles.overrideAttrs (old: { + buildInputs = (old.buildInputs or []) ++ [ super.hatchling super.hatch-vcs ]; + }); nh3 = let getCargoHash = version: { diff --git a/poetry.lock b/poetry.lock index 2710914d..1ab4a7fe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,17 @@ # This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +[[package]] +name = "aiofiles" +version = "23.2.1" +description = "File support for asyncio." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiofiles-23.2.1-py3-none-any.whl", hash = "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107"}, + {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"}, +] + [[package]] name = "aiohttp" version = "3.8.6" @@ -109,6 +121,18 @@ yarl = ">=1.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns", "cchardet"] +[[package]] +name = "aioshutil" +version = "1.3" +description = "Asynchronous shutil module." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aioshutil-1.3-py3-none-any.whl", hash = "sha256:a441c99ef5f9b77fdd429ea7d043b8a358aa9b0c87043868113f9790c9aea400"}, + {file = "aioshutil-1.3.tar.gz", hash = "sha256:ddabe1748c8a71ec3c7d213a4d0cf58fb495c71419334a0575b1f8a3be8373e8"}, +] + [[package]] name = "aiosignal" version = "1.3.1" @@ -2008,4 +2032,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "~3.10" -content-hash = "c6b4effa369f086c7c46316ec2c727c7961895ea9301ba0900f87158ba8abb14" +content-hash = "1c8b8a40d6e4fd2a1534484ea7fc3a91e36e5e9ff029fccc2cc82afdcfdefef8" diff --git a/pyproject.toml b/pyproject.toml index 428d47f4..e42dfb5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,8 @@ tzlocal = "^5.0" colorama = "^0.4.6" aiohttp = "^3.8.4" rich = "^13.3.2" +aiofiles = "^23.2.1" +aioshutil = "^1.3" [tool.poetry.dev-dependencies] pre-commit = "^3.3.3" diff --git a/src/backy/api.py b/src/backy/api.py index 1f4fbc6c..79fc498c 100644 --- a/src/backy/api.py +++ b/src/backy/api.py @@ -177,7 +177,7 @@ async def run_job(self, request: web.Request): async def list_backups(self, request: web.Request) -> List[str]: request["log"].info("list-backups") - return self.daemon.find_dead_backups() + return await self.daemon.find_dead_backups() async def get_backup(self, request: web.Request) -> Backup: name = request.match_info.get("backup_name", None) diff --git a/src/backy/daemon.py b/src/backy/daemon.py index 0c9ef9d4..6a09678d 100644 --- a/src/backy/daemon.py +++ b/src/backy/daemon.py @@ -2,12 +2,13 @@ import fcntl import os import os.path as p -import shutil import signal import sys import time from typing import IO, List, Optional, Pattern +import aiofiles.os as aos +import aioshutil import yaml from structlog.stdlib import BoundLogger @@ -15,7 +16,7 @@ from .revision import filter_manual_tags from .schedule import Schedule from .scheduler import Job -from .utils import has_recent_changes +from .utils import has_recent_changes, is_dir_no_symlink daemon: "BackyDaemon" @@ -298,37 +299,33 @@ def status(self, filter_re: Optional[Pattern[str]] = None) -> List[dict]: return result async def purge_old_files(self): - # `stat` and other file system access things are _not_ - # properly async, we might want to spawn those off into a separate - # thread. while True: try: self.log.info("purge-scanning") - for candidate in os.scandir(self.base_dir): - if not candidate.is_dir(follow_symlinks=False): + for candidate in await aos.scandir(self.base_dir): + if not await is_dir_no_symlink(candidate.path): continue self.log.debug("purge-candidate", candidate=candidate.path) reference_time = time.time() - 3 * 31 * 24 * 60 * 60 - if not has_recent_changes(candidate, reference_time): + if not await has_recent_changes( + candidate.path, reference_time + ): self.log.info("purging", candidate=candidate.path) - shutil.rmtree(candidate) + await aioshutil.rmtree(candidate) self.log.info("purge-finished") except Exception: self.log.exception("purge") await asyncio.sleep(24 * 60 * 60) async def purge_pending_backups(self): - # `stat` and other file system access things are _not_ - # properly async, we might want to spawn those off into a separate - # thread. while True: try: self.log.info("purge-pending-scanning") - for candidate in os.scandir(self.base_dir): + for candidate in await aos.scandir(self.base_dir): if ( - not candidate.is_dir(follow_symlinks=False) - or candidate.name in self.jobs # will get purged anyway - or not p.exists( + candidate.name in self.jobs # will get purged anyway + or not await is_dir_no_symlink(candidate.path) + or not await aos.path.exists( p.join(candidate.path, ".purge_pending") ) ): @@ -340,12 +337,12 @@ async def purge_pending_backups(self): self.log.exception("purge-pending") await asyncio.sleep(24 * 60 * 60) - def find_dead_backups(self) -> List[str]: + async def find_dead_backups(self) -> List[str]: self.log.debug("scanning-backups") return [ b.name - for b in os.scandir(self.base_dir) - if b.is_dir(follow_symlinks=False) and b.name not in self.jobs + for b in await aos.scandir(self.base_dir) + if await is_dir_no_symlink(b.path) and b.name not in self.jobs ] diff --git a/src/backy/tests/test_daemon.py b/src/backy/tests/test_daemon.py index d8522f1a..c03438a4 100644 --- a/src/backy/tests/test_daemon.py +++ b/src/backy/tests/test_daemon.py @@ -19,7 +19,7 @@ @pytest.fixture -async def daemon(tmpdir, event_loop, log): +async def daemon(tmpdir, event_loop, monkeypatch, log): daemon = BackyDaemon(str(tmpdir / "config"), log) base_dir = str(tmpdir) source = str(tmpdir / "test01.source") @@ -59,7 +59,13 @@ async def daemon(tmpdir, event_loop, log): os.mkdir(tmpdir / "dead01") - daemon.start(event_loop) + async def null_coroutine(): + return + + with monkeypatch.context() as m: + m.setattr(daemon, "purge_old_files", null_coroutine) + m.setattr(daemon, "purge_pending_backups", null_coroutine) + daemon.start(event_loop) yield daemon daemon.terminate() diff --git a/src/backy/utils.py b/src/backy/utils.py index 41bb9387..ffc27d04 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -14,6 +14,7 @@ from typing import IO, Callable from zoneinfo import ZoneInfo +import aiofiles.os as aos import humanize import structlog import tzlocal @@ -428,7 +429,11 @@ def min_date(): return datetime.datetime.min.replace(tzinfo=ZoneInfo("UTC")) -def has_recent_changes(entry, reference_time): +async def is_dir_no_symlink(p): + return await aos.path.isdir(p) and not await aos.path.islink(p) + + +async def has_recent_changes(path: str, reference_time: float) -> bool: # This is not efficient on a first look as we may stat things twice, but it # makes the recursion easier to read and the VFS will be caching this # anyway. @@ -436,19 +441,19 @@ def has_recent_changes(entry, reference_time): # higher levels will propagate changed mtimes do to new/deleted files # instead of just modified files in our case and looking at stats when # traversing a directory level is faster than going depth first. - st = entry.stat(follow_symlinks=False) + st = await aos.stat(path, follow_symlinks=False) if st.st_mtime >= reference_time: return True - if not entry.is_dir(follow_symlinks=False): + if not await is_dir_no_symlink(path): return False - candidates = list(os.scandir(entry.path)) # First pass: stat all direct entries - for candidate in candidates: - if candidate.stat(follow_symlinks=False).st_mtime >= reference_time: + for candidate in await aos.scandir(path): + st = await aos.stat(candidate.path, follow_symlinks=False) + if st.st_mtime >= reference_time: return True # Second pass: start traversing - for candidate in os.scandir(entry.path): - if has_recent_changes(candidate, reference_time): + for candidate in await aos.scandir(path): + if await has_recent_changes(candidate.path, reference_time): return True return False