Skip to content

Commit

Permalink
feat: avoid superfluous calls to os.walk when relinking missing files (
Browse files Browse the repository at this point in the history
  • Loading branch information
Toby222 committed Nov 29, 2024
1 parent 262893a commit e5e7b47
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
8 changes: 7 additions & 1 deletion tagstudio/src/core/library/json/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ class Library:
def __init__(self) -> None:
# Library Info =========================================================
self.library_dir: Path = None
# Cached result of os.walk when relinking library files
self._library_file_cache: list[(string, list[string], list[string])] = None

# Entries ==============================================================
# List of every Entry object.
Expand Down Expand Up @@ -1092,6 +1094,7 @@ def fix_missing_files(self):
yield (i, True)
else:
yield (i, False)
self._library_file_cache = None

# self._purge_empty_missing_entries()

Expand Down Expand Up @@ -1124,9 +1127,12 @@ def _match_missing_file(self, file: str) -> list[Path]:

matches = []

if self._library_file_cache is None:
self._library_file_cache = list(os.walk(self.library_dir))

# for file in self.missing_files:
path = Path(file)
for root, dirs, files in os.walk(self.library_dir):
for root, dirs, files in self._library_file_cache:
for f in files:
# print(f'{tail} --- {f}')
if path.name == f and "$recycle.bin" not in str(root).lower():
Expand Down
12 changes: 8 additions & 4 deletions tagstudio/src/core/utils/missing_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class MissingRegistry:
library: Library
files_fixed_count: int = 0
missing_files: list[Entry] = field(default_factory=list)
library_file_cache: set[Path] = None

@property
def missing_files_count(self) -> int:
Expand All @@ -40,15 +41,17 @@ def match_missing_file(self, match_item: Entry) -> list[Path]:
Works if files were just moved to different subfolders and don't have duplicate names.
"""
matches = []
for item in self.library.library_dir.glob(f"**/{match_item.path.name}"):
if item.name == match_item.path.name: # TODO - implement IGNORE_ITEMS
new_path = Path(item).relative_to(self.library.library_dir)
matches.append(new_path)
# TODO - implement IGNORE_ITEMS
for matched_path in filter(lambda file: file.name == match_item.path.name, self.library_file_cache):
logger.info("match_missing_files", matched_path=matched_path)
new_path = Path(matched_path).relative_to(self.library.library_dir)
matches.append(new_path)

return matches

def fix_missing_files(self) -> Iterator[int]:
"""Attempt to fix missing files by finding a match in the library directory."""
self.library_file_cache = set(self.library.library_dir.rglob("*"))
self.files_fixed_count = 0
for i, entry in enumerate(self.missing_files, start=1):
item_matches = self.match_missing_file(entry)
Expand All @@ -59,6 +62,7 @@ def fix_missing_files(self) -> Iterator[int]:
# remove fixed file
self.missing_files.remove(entry)
yield i
self.library_file_cache = None

def execute_deletion(self) -> Iterator[int]:
for i, missing in enumerate(self.missing_files, start=1):
Expand Down

0 comments on commit e5e7b47

Please sign in to comment.