From 395bc04814cababdc925b027f3520eef10295018 Mon Sep 17 00:00:00 2001 From: exdysa <91800957+exdysa@users.noreply.github.com> Date: Sun, 5 Jan 2025 13:57:15 -0500 Subject: [PATCH 1/2] Linted, adjust folder contents, simplify UI clear operations, add more docstrings --- dataset_tools/__init__.py | 56 ++++---- dataset_tools/main.py | 11 +- dataset_tools/metadata_parser.py | 73 ++++++----- dataset_tools/ui.py | 216 +++++++++++++++---------------- dataset_tools/widgets.py | 88 ++++++++----- tests/test_md_ps.py | 8 +- 6 files changed, 248 insertions(+), 204 deletions(-) diff --git a/dataset_tools/__init__.py b/dataset_tools/__init__.py index 9ec5fa8..1daf5bb 100644 --- a/dataset_tools/__init__.py +++ b/dataset_tools/__init__.py @@ -1,48 +1,43 @@ +""" 初始化""" + # pylint: disable=line-too-long - # 初始化 - -from importlib.metadata import version, PackageNotFoundError -from re import I # setuptools-scm versioning -try: - __version__ = version("dataset-tools") -except PackageNotFoundError: - # package is not installed - pass - + # from re import I # setuptools-scm versioning +import os import sys -if "pytest" not in sys.modules: - import argparse - from typing import Literal +from importlib.metadata import version, PackageNotFoundError +import logging +import argparse +from typing import Literal - levels = {"d": "DEBUG", "w": "WARNING", "e": "ERROR", "c": "CRITICAL", "i": "INFO"} +from rich.logging import RichHandler +from rich.console import Console +if "pytest" not in sys.modules: parser = argparse.ArgumentParser(description="Set logging level.") group = parser.add_mutually_exclusive_group() - choices = list(levels.keys()) + [v for v in levels.values()] + [v.upper() for v in levels.values()] + + levels = {"d": "DEBUG", "w": "WARNING", "e": "ERROR", "c": "CRITICAL", "i": "INFO"} + choices = list(levels.keys()) + list(levels.values()) + [v.upper() for v in levels.values()] for short, long in levels.items(): group.add_argument(f'-{short}', f'--{long.lower()}', f'--{long}', action='store_true', help=f"Set logging level {long}") group.add_argument('--log-level', default='i', type=str, - choices=choices, help=f"Set the logging level ({choices})") + choices=choices, help=f"Set the logging level ({choices})") args = parser.parse_args() # Resolve log_level from args dynamically - log_level = levels[next(iter([k for k,v in levels.items() if getattr(args, v.lower(), False)]), args.log_level)] + LOG_LEVEL = levels[next(iter([k for k,v in levels.items() if getattr(args, v.lower(), False)]), args.log_level)] else: - log_level = "DEBUG" -EXC_INFO: bool = log_level != "i" + LOG_LEVEL = "DEBUG" -import logging -from logging import Logger -import sys +EXC_INFO: bool = LOG_LEVEL != "i" + +#begin routine +msg_init = None # pylint: disable=invalid-name -msg_init = None -from rich.logging import RichHandler -from rich.console import Console -from rich.logging import RichHandler handler = RichHandler(console=Console(stderr=True)) if handler is None: @@ -54,12 +49,17 @@ datefmt="%Y-%m-%d %H:%M:%S", ) handler.setFormatter(formatter) -logging.root.setLevel(log_level) +logging.root.setLevel(LOG_LEVEL) logging.root.addHandler(handler) if msg_init is not None: logger = logging.getLogger(__name__) logger.info(msg_init) -log_level = getattr(logging, log_level) +log_level = getattr(logging, LOG_LEVEL) logger = logging.getLogger(__name__) + +try: + __version__ = version("dataset-tools") +except PackageNotFoundError: + logger.info("dataset-tools package is not installed. Did you run `pip install .`?", exc_info=EXC_INFO) diff --git a/dataset_tools/main.py b/dataset_tools/main.py index 1f27177..31f9a7e 100644 --- a/dataset_tools/main.py +++ b/dataset_tools/main.py @@ -1,10 +1,17 @@ +"""啟動程式,退出程式""" import sys + +from PyQt6 import QtWidgets # ignore + +from dataset_tools import logger from dataset_tools.ui import MainWindow # Import our main window class def main(): - from PyQt6.QtWidgets import QApplication - app = QApplication(sys.argv) + """Launch application""" + logger.info("%s","Launching application...") + + app = QtWidgets.QApplication(sys.argv) # pylint: disable=c-extension-no-member window = MainWindow() # Initialize our main window. window.show() sys.exit(app.exec()) diff --git a/dataset_tools/metadata_parser.py b/dataset_tools/metadata_parser.py index 9736565..5c432e1 100644 --- a/dataset_tools/metadata_parser.py +++ b/dataset_tools/metadata_parser.py @@ -1,11 +1,12 @@ +"""為使用者介面清理和安排元資料""" + # pylint: disable=line-too-long import re import json -from collections import defaultdict -from dataset_tools import logger -from PIL import Image - +from PIL import Image +from PIL.ExifTags import TAGS +from dataset_tools import logger def open_jpg_header(file_path_named: str) -> dict: """ @@ -13,13 +14,13 @@ def open_jpg_header(file_path_named: str) -> dict: :param file_path_named: `str` The path and file name of the jpg file :return: `Generator[bytes]` Generator element containing header tags """ - from PIL.ExifTags import TAGS - pil_img = Image.open(file_path_named) - exif_info = pil_img._getexif() - exif = {TAGS.get(k, k): v for k, v in exif_info.items()} + pil_image = Image.open(file_path_named) + info = pil_image.info + if info is None: + return None + exif = {TAGS.get(k, k): v for k, v in info.items()} return exif - def open_png_header(file_path_named: str) -> dict: """ Open png format files\n @@ -27,10 +28,10 @@ def open_png_header(file_path_named: str) -> dict: :return: `Generator[bytes]` Generator element containing header bytes """ pil_img = Image.open(file_path_named) - if pil_img == None: - pil_img.load() + if pil_img is None: # We dont need to load completely unless totally necessary + pil_img.load() # This is the case when we have no choice but to load (slower) text_chunks = pil_img.info - logger.debug(text_chunks) + logger.debug("%s",f"{text_chunks}") return text_chunks @@ -47,10 +48,9 @@ def format_chunk(text_chunks: dict) -> dict: clean_segments = [seg.replace(buffer, '') for seg in segmented_string] cleaned_text = ' '.join(map(str,clean_segments)).replace('\n',',') - logger.debug(f"{cleaned_text}") + logger.debug("%s",f"{cleaned_text}") return cleaned_text - def extract_enclosed_values(cleaned_text: str) -> tuple[str, list]: """ Split string by pre-delineated tag information\n @@ -63,13 +63,16 @@ def extract_enclosed_values(cleaned_text: str) -> tuple[str, list]: structured_dict = {} for item in prestructured_data: # Only keep non-empty groups - if item[1]: structured_dict[item[0]] = item[1] - elif item[3]: structured_dict[item[2]] = item[3] - else: structured_dict['Hashes'] = item[4] + if item[1]: + structured_dict[item[0]] = item[1] + elif item[3]: + structured_dict[item[2]] = item[3] + else: + structured_dict['Hashes'] = item[4] dehashed_text = re.sub(pattern, ',', cleaned_text).strip() - logger.debug(f"{dehashed_text}") - logger.debug(f"{structured_dict}") + logger.debug("%s",f"{dehashed_text}") + logger.debug("%s",f"{structured_dict}") return dehashed_text, structured_dict def structured_metadata_list_to_dict(prestructured_data: list) -> dict: @@ -87,7 +90,7 @@ def structured_metadata_list_to_dict(prestructured_data: list) -> dict: system_metadata[ti_hash_key] = prestructured_data[key] else: # Hardware Info, strip quotes""" system_metadata[key.split(' ', 1)[0]] = prestructured_data[key].strip('"') - logger.debug(f"{system_metadata}") + logger.debug("%s", f"{system_metadata}") return system_metadata @@ -104,23 +107,24 @@ def dehashed_metadata_str_to_dict(dehashed_text: str) -> dict: # Rest of dehashed as a dict: dehashed_pairs = [p for p in dehashed_text.split(',') if ': ' in p and p.strip()] + neg_side = dehashed_pairs[0].split('Steps:') match = re.search(r'Negative prompt:\s*(.*?)Steps', dehashed_pairs[0]) negative = match.group(1) if match else None - logger.debug(negative) - logger.debug(neg_side) + logger.debug("%s",f"{negative}") + logger.debug("%s",f"{neg_side}") - logger.debug(negative) - logger.debug(f"{dehashed_pairs}") + logger.debug("%s",f"{negative}") + logger.debug("%s",f"{dehashed_pairs}") - generation_metadata = {k: v for k, v in (pair.split(': ', 1) for pair in dehashed_pairs)} + generation_metadata = dict((pair.split(': ', 1) for pair in dehashed_pairs)) logger.debug(generation_metadata) positive = next(iter(pos_key_val)) # Sample positive prompt positive_prompt = pos_key_val[positive] # Separate prompts prompt_metadata = {"Positive prompt" : positive_prompt } | generation_metadata - logger.debug(f"{prompt_metadata}") + logger.debug("%s",f"{prompt_metadata}") return prompt_metadata, generation_metadata def parse_metadata(file_path_named: str) -> dict: @@ -130,19 +134,24 @@ def parse_metadata(file_path_named: str) -> dict: :return: `dict` The metadata from the header of the file """ header_chunks = open_png_header(file_path_named) - if next(iter(header_chunks)) == 'parameters': - logger.debug(next(iter(header_chunks))) + + metadata = None + + if next(iter(header_chunks)) == 'parameters': # A1111 format + logger.debug("%s",f"{next(iter(header_chunks))}") cleaned_text = format_chunk(header_chunks) dehashed_text, structured_dict = extract_enclosed_values(cleaned_text) system_metadata = structured_metadata_list_to_dict(structured_dict) prompt_metadata, generation_metadata = dehashed_metadata_str_to_dict(dehashed_text) - logger.debug(f"{prompt_metadata | generation_metadata | system_metadata}") + logger.debug("%s",f"{prompt_metadata, generation_metadata, system_metadata}") + logger.debug("%s",f"{type(prompt_metadata), type(generation_metadata), type(system_metadata)}") + elif next(iter(header_chunks)) == 'prompt': - """Placeholder""" + # """Placeholder""" + pass metadata = {"Prompts": prompt_metadata, "Settings": generation_metadata, "System": system_metadata} return metadata - # hash_sample = re.search(r', cleaned_text) # hash_sample_structure = eval(hash_sample.group(1)) # Return result 1 if found else 0 - #dehashed_text = re.sub(r'Hashes: \{.*?\}', '', cleaned_text).strip() \ No newline at end of file + #dehashed_text = re.sub(r'Hashes: \{.*?\}', '', cleaned_text).strip() diff --git a/dataset_tools/ui.py b/dataset_tools/ui.py index 99eec9e..17a3ecd 100644 --- a/dataset_tools/ui.py +++ b/dataset_tools/ui.py @@ -1,36 +1,27 @@ -from importlib import metadata -from xml.dom.minidom import parseString -from dataset_tools import logger +"""App Ui""" + # pylint: disable=line-too-long + # pylint: disable=c-extension-no-member +# pylint: disable=attribute-defined-outside-init + +from encodings import utf_8 import pprint import os +from pathlib import Path as p +from PyQt6 import QtWidgets as Qw +from PyQt6 import QtCore, QtGui -from PyQt6.QtWidgets import ( - QMainWindow, - QWidget, - QVBoxLayout, - QHBoxLayout, - QPushButton, - QLabel, - QFileDialog, - QProgressBar, - QListWidget, - QAbstractItemView, - QTextEdit -) -from PyQt6.QtCore import Qt, QThread, pyqtSignal -from PyQt6.QtGui import QFont, QPixmap -from dataset_tools.widgets import FileLoader -import imghdr -from dataset_tools.metadata_parser import parse_metadata, open_jpg_header + +from dataset_tools import logger from dataset_tools import EXC_INFO -import re +from dataset_tools.metadata_parser import parse_metadata, open_jpg_header +from dataset_tools.widgets import FileLoader, Ext -class MainWindow(QMainWindow): +class MainWindow(Qw.QMainWindow): + """"Consolidated raw functions and behavior of window""" def __init__(self): super().__init__() - logger.info("Launching application...") # Set a default font for the app - # app_font = QFont("Arial", 12) + # app_font = QtGui.QFont("Arial", 12) # self.setFont(app_font) self.setWindowTitle("Dataset Viewer") @@ -38,126 +29,138 @@ def __init__(self): self.setMinimumSize(800, 600) # set minimum size for standard window. # Central widget to hold our layout - central_widget = QWidget() + central_widget = Qw.QWidget() self.setCentralWidget(central_widget) # Main layout - main_layout = QHBoxLayout(central_widget) + main_layout = Qw.QHBoxLayout(central_widget) # Left panel layout - left_panel = QWidget() - left_layout = QVBoxLayout(left_panel) + left_panel = Qw.QWidget() + left_layout = Qw.QVBoxLayout(left_panel) main_layout.addWidget(left_panel) # Folder Path Label - self.current_folder_label = QLabel("Current Folder: None") + self.current_folder_label = Qw.QLabel("Current Folder: None") left_layout.addWidget(self.current_folder_label) # Placeholder UI - self.open_folder_button = QPushButton("Open Folder") + self.open_folder_button = Qw.QPushButton("Open Folder") self.open_folder_button.clicked.connect(self.open_folder) left_layout.addWidget(self.open_folder_button) # Placeholder label, you can remove this later - self.message_label = QLabel("Select a folder!") + self.message_label = Qw.QLabel("Select a folder!") left_layout.addWidget(self.message_label) - # File list (replaced QLabel with QListWidget) - self.files_list = QListWidget() - self.files_list.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection) + # File list (replaced Qw.QLabel with Qw.QListWidget) + self.files_list = Qw.QListWidget() + self.files_list.setSelectionMode(Qw.QAbstractItemView.SelectionMode.SingleSelection) self.files_list.itemClicked.connect(self.on_file_selected) left_layout.addWidget(self.files_list) # Add a progress bar for file loading - self.progress_bar = QProgressBar() + self.progress_bar = Qw.QProgressBar() self.progress_bar.hide() left_layout.addWidget(self.progress_bar) # Right panel Layout - right_panel = QWidget() - right_layout = QVBoxLayout(right_panel) + right_panel = Qw.QWidget() + right_layout = Qw.QVBoxLayout(right_panel) main_layout.addWidget(right_panel) # Image preview area - self.image_preview = QLabel() - self.image_preview.setAlignment(Qt.AlignmentFlag.AlignCenter) + self.image_preview = Qw.QLabel() + self.image_preview.setAlignment(QtCore.Qt.AlignmentFlag.AlignCenter) self.image_preview.setMinimumHeight(300) right_layout.addWidget(self.image_preview) # Right top separator - self.top_separator = QLabel() + self.top_separator = Qw.QLabel() self.top_separator.setText("Prompt Info will show here") self.top_separator.setMinimumWidth(400) right_layout.addWidget(self.top_separator) # Upper Right box - self.upper_box = QTextEdit() + self.upper_box = Qw.QTextEdit() self.upper_box.setReadOnly(True) self.upper_box.setMinimumWidth(400) right_layout.addWidget(self.upper_box) # Right boxes separator - self.separator_text = QLabel() - self.separator_text.setText("Generation Info will show here") - self.separator_text.setMinimumWidth(400) - right_layout.addWidget(self.separator_text) + self.mid_separator = Qw.QLabel() + self.mid_separator.setText("Generation Info will show here") + self.mid_separator.setMinimumWidth(400) + right_layout.addWidget(self.mid_separator) # Lower Right box - self.lower_box = QTextEdit() + self.lower_box = Qw.QTextEdit() self.lower_box.setMinimumWidth(400) self.lower_box.setReadOnly(True) right_layout.addWidget(self.lower_box) - self.file_loader = None - self.file_list = [] - self.image_list = [] - self.text_files = [] self.current_folder = None + self.clear_file_list() + logger.debug("%s","File List cleared") def open_folder(self): - # Open a dialog to select the folder - folder_path = QFileDialog.getExistingDirectory(self, "Select a folder") + """Open a dialog to select the folder""" + folder_path = Qw.QFileDialog.getExistingDirectory(self, "Select a folder") + logger.debug("%s",f"Folder opened {folder_path}") if folder_path: # Call the file loading function self.load_files(folder_path) def clear_files(self): - if self.file_loader: - self.file_loader.clear_files() - self.file_list = [] - self.image_list = [] - self.text_files = [] - self.files_list.clear() - self.image_preview.clear() - self.lower_box.clear() - self.separator_text.clear() - self.upper_box.clear() - self.top_separator.clear() + """Empty all field displays""" + if self.file_loader: + self.file_loader.clear_files() + self.files_list.clear() + self.clear_file_list() + logger.debug("%s","File List cleared anew") + self.clear_selection() + logger.debug("%s","Selection cleared") + + def clear_file_list(self): + """Initialize or re-initialize display of files""" + self.file_list = [] + logger.debug("%s",f"File List Initialized {self.file_list}") + self.image_list = [] + self.text_files = [] + + def clear_selection(self): + """Empty file metadata display""" + self.image_preview.clear() + self.lower_box.clear() + self.mid_separator.clear() + self.upper_box.clear() + self.top_separator.clear() def load_files(self, folder_path): - # Start background loading of files using QThread + """Start background loading of files using QThread""" self.current_folder = folder_path self.current_folder_label.setText(f"Current Folder: {folder_path}") self.message_label.setText("Loading files...") self.progress_bar.setValue(0) self.progress_bar.show() - if self.file_loader: self.file_loader.finished.disconnect() self.file_loader = FileLoader(folder_path) self.file_loader.progress.connect(self.update_progress) self.file_loader.finished.connect(self.on_files_loaded) self.file_loader.start() + logger.debug("%s",f"Loading files from {folder_path}...") def update_progress(self, progress): - # Update progress bar + """Update progress bar""" self.progress_bar.setValue(progress) def on_files_loaded(self, image_list, text_files, loaded_folder): + """Callback for working folder contents""" if self.current_folder != loaded_folder: # We are loading files from a different folder # than what's currently selected, so we need to ignore this. @@ -168,72 +171,67 @@ def on_files_loaded(self, image_list, text_files, loaded_folder): self.message_label.setText(f"Loaded {len(self.image_list)} images and {len(self.text_files)} text files") self.progress_bar.hide() - # Clear and populate the QListWidget + # Clear and populate the Qw.QListWidget self.files_list.clear() self.files_list.addItems(self.image_list) self.files_list.addItems(self.text_files) def on_file_selected(self, item): + """Activate metadta on nab function""" file_path = item.text() self.message_label.setText(f"Selected {os.path.normpath(os.path.basename(file_path))}") # Clear any previous selection - self.image_preview.clear() - self.lower_box.clear() - self.separator_text.clear() - self.upper_box.clear() - self.top_separator.clear() + self.clear_selection() - if file_path.lower().endswith(('.png','.jpg','.jpeg','.webp')): - # Load the image - self.load_image_preview(file_path) - metadata = self.load_metadata(file_path) - self.display_metadata(metadata, file_path) + extension = p(file_path).suffix.lower() + self.load_image_preview(file_path) + metadata = self.load_metadata(file_path, extension) - if file_path.lower().endswith('.txt'): + if extension in Ext.TEXT: # Load the text file self.load_text_file(file_path) - def load_metadata(self, file_path): - metadata = None - try: - if imghdr.what(file_path) == 'png': - metadata = parse_metadata(file_path) - - elif imghdr.what(file_path) in ['jpeg', 'jpg']: - metadata = open_jpg_header(file_path) + self.display_metadata(metadata, file_path) + def load_metadata(self, file_path: str, extension: str='.png') -> dict: + """ + Fetch metadata from file\n + :param file_path: `str` The file to interpret + :param extension'': + :param : + :return: + """ + try: + metadata = open_jpg_header(file_path) if (extension == Ext.JPEG or extension == Ext.WEBP) else parse_metadata(file_path) except IndexError as error_log: - logger.info(f"Unexpected list position, out of range error for metadata in {file_path}, {error_log}", exc_info=EXC_INFO) - pass + logger.info("Unexpected list position, out of range error for metadata in %s", f"{file_path}, {error_log}", exc_info=EXC_INFO) except UnboundLocalError as error_log: - logger.info(f"Variable not declared while extracting metadata from {file_path}, {error_log}", exc_info=EXC_INFO) - pass + logger.info("Variable not declared while extracting metadata from %s", f"{file_path}, {error_log}", exc_info=EXC_INFO) except ValueError as error_log: - logger.info(f"Invalid dictionary formatting while extracting metadata from {file_path}, {error_log}", exc_info=EXC_INFO) - pass - + logger.info("Invalid dictionary formatting while extracting metadata from %s", f"{file_path}, {error_log}", exc_info=EXC_INFO) else: return metadata def display_metadata(self, metadata, file_path): + """direct collated data to fields and pretty print there""" if metadata is not None: + logger.debug("%s",f"{metadata}") prompt_keys = ['Positive prompt','Negative prompt', 'Prompt'] self.top_separator.setText('Prompt Data:') - self.separator_text.setText('Generation Data:') + self.mid_separator.setText('Generation Data:') try: - prompt_data = metadata['Prompts'] + prompt_data = metadata[next(iter(metadata))] prompt_fields = f"{prompt_data.get('Positive prompt')}\n{prompt_data.get('Negative prompt')}" - logger.debug(prompt_data.get('Positive prompt')) + logger.debug("%s",f"{prompt_data.get('Positive prompt')}") self.upper_box.setText(prompt_fields) except TypeError as error_log: - logger.info(f"Invalid data in prompt fields {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) - pass - + logger.info("Invalid data in prompt fields %s", f" {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) + except KeyError as error_log: + logger.info("Invalid key name for %s", f" {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) except AttributeError as error_log: - logger.info(f"Attribute cannot be applied to type {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) - pass + logger.info("Attribute cannot be applied to type %s", f" {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) try: not_prompt = {k: v for k, v in metadata.get('Prompts').items() if k not in prompt_keys and metadata.get('Prompts', None) is not None} @@ -241,17 +239,17 @@ def display_metadata(self, metadata, file_path): self.lower_box.setText(pprint.pformat(generation_data)) except AttributeError as error_log: - logger.info(f"'items' attribute cannot be applied to type {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) - pass + logger.info("'items' attribute cannot be applied to type %s", f" {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) def load_image_preview(self, file_path): - # load image file - pixmap = QPixmap(file_path) + """Show preview of image file""" + pixmap = QtGui.QPixmap(file_path) # scale the image - self.image_preview.setPixmap(pixmap.scaled(self.image_preview.size(), Qt.AspectRatioMode.KeepAspectRatio, Qt.TransformationMode.SmoothTransformation)) + self.image_preview.setPixmap(pixmap.scaled(self.image_preview.size(), QtCore.Qt.AspectRatioMode.KeepAspectRatio, QtCore.Qt.TransformationMode.SmoothTransformation)) def load_text_file(self, file_path): - with open(file_path, 'r') as f: + """Read metadata inside a text file""" + with open(file_path, 'r', encoding=utf_8) as f: content = f.read() - self.lower_box.setText(content) \ No newline at end of file + self.lower_box.setText(content) diff --git a/dataset_tools/widgets.py b/dataset_tools/widgets.py index 61cb3ba..aec8171 100644 --- a/dataset_tools/widgets.py +++ b/dataset_tools/widgets.py @@ -1,10 +1,22 @@ +"""Widget contents""" + import os -from PyQt6.QtCore import QThread, pyqtSignal -from dataset_tools import logger +from pathlib import Path as p +from PyQt6 import QtCore + +from dataset_tools import logger, EXC_INFO -class FileLoader(QThread): - finished = pyqtSignal(list, list, str) - progress = pyqtSignal(int) +class Ext(list[str]): + """Valid file formats for metadata reading""" + PNG_ = [".png"] + JPEG = ['.jpg','.jpeg'] + WEBP = ['.webp'] + TEXT = ['.txt'] + +class FileLoader(QtCore.QThread): # pylint: disable=c-extension-no-member + """Opens files in the UI""" + finished = QtCore.pyqtSignal(list, list, str) # pylint: disable=c-extension-no-member + progress = QtCore.pyqtSignal(int) # pylint: disable=c-extension-no-member def __init__(self, folder_path): super().__init__() @@ -14,30 +26,48 @@ def __init__(self, folder_path): self.text_files = [] def run(self): - self.images, self.text_files = self._scan_directory(self.folder_path) + """Open selected folder""" + folder_contents = self.scan_directory(self.folder_path) + self.images, self.text_files = self.populate_index_from_list(folder_contents) self.finished.emit(self.images, self.text_files, self.folder_path) - def _scan_directory(self, folder_path): - files = [] - images = [] - text_files = [] - # Gather paths to all files in the selected folder - try: - all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)] - except FileNotFoundError: - return images, text_files - total_files = len(all_files) - progress = 0 - for index, file_path in enumerate(all_files): - if os.path.isfile(file_path): - # Filter the file types as needed - if file_path.lower().endswith(('.png','.jpg','.jpeg','.webp')): - images.append(file_path) - if file_path.lower().endswith(('.txt')): - text_files.append(file_path) - progress = (index + 1)/total_files * 100 - self.progress.emit(int(progress)) - return images, text_files - + def scan_directory(self, folder_path:str ) -> list: + """ + # Gather paths to all files in the selected folder\n + :param folder_path: `str` The directory to scan + :return: `list` The file contents of the directory + """ + + try: + folder_contents = [os.path.join(folder_path, f) for f in os.listdir(folder_path)] + except FileNotFoundError as error_log: + logger.info("Error loading folder %s",f"{folder_path} {error_log}", exc_info=EXC_INFO) + else: + return folder_contents + + def populate_index_from_list(self,folder_contents: list)-> tuple[list]: + """ + Create an index of relevant files from a list\n + :param : + :return: `tuple[list]` Images and text files that can be loaded by the system + """ + image_files = [] + text_files = [] + file_count = len(folder_contents) + progress = 0 + for index, file_path in enumerate(folder_contents): + if os.path.isfile(file_path): + logger.debug("%s",f"{file_path}") + # Filter the file types as needed + if p(file_path).suffix.lower() in Ext.PNG_ or Ext.JPEG or Ext.WEBP: + image_files.append(file_path) + if p(file_path).suffix.lower() in Ext.TEXT: + text_files.append(file_path) + logger.debug("%s",f"{image_files, text_files}") + progress = (index + 1)/file_count * 100 + self.progress.emit(int(progress)) + return image_files, text_files + def clear_files(self): - self.files = [] \ No newline at end of file + """Empty file ilst""" + self.files = [] diff --git a/tests/test_md_ps.py b/tests/test_md_ps.py index dce4e58..3f87028 100644 --- a/tests/test_md_ps.py +++ b/tests/test_md_ps.py @@ -1,6 +1,6 @@ - -import unittest import os +import unittest + from dataset_tools import logger from dataset_tools.metadata_parser import ( format_chunk, @@ -10,7 +10,6 @@ extract_enclosed_values ) - class TestParseMetadata(unittest.TestCase): def setUp(self): @@ -43,13 +42,14 @@ def test_structured_metadata_list_to_dict(self): _, structured_dict = extract_enclosed_values(self.cleaned_text) final_text = structured_metadata_list_to_dict(structured_dict) expected_output = self.structured_text + logger.debug("%s",f"{final_text}") assert final_text == expected_output def test_dehashed_metadata_str_to_dict(self): prompt_metadata, _ = extract_enclosed_values(self.cleaned_text) final_text = dehashed_metadata_str_to_dict(prompt_metadata) expected_output = self.dehashed_text - print(final_text) + logger.debug("%s",f"{final_text}") assert final_text == expected_output if __name__ == '__main__': From 1c1f2775b6800fdb89b802a86dd061f02a959147 Mon Sep 17 00:00:00 2001 From: exdysa <91800957+exdysa@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:02:24 -0500 Subject: [PATCH 2/2] Rework Preliminary ComfyUI tag format support --- dataset_tools/metadata_parser.py | 83 ++++++++++++++++++++++++++------ dataset_tools/ui.py | 10 ++-- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/dataset_tools/metadata_parser.py b/dataset_tools/metadata_parser.py index 5c432e1..e7cf003 100644 --- a/dataset_tools/metadata_parser.py +++ b/dataset_tools/metadata_parser.py @@ -75,6 +75,16 @@ def extract_enclosed_values(cleaned_text: str) -> tuple[str, list]: logger.debug("%s",f"{structured_dict}") return dehashed_text, structured_dict +def clean_with_json(prestructured_data:dict, key_name: str) -> dict: + """ + Use json loads to arrange half-formatted dictinto valid dict\n + :param prestructured_data: `dict` A dict with a single working key + :param key_name: `str` The single working key name + :return: `dict` A formatted dictionary object + """ + cleaned_data = json.loads(prestructured_data[key_name]) + return cleaned_data + def structured_metadata_list_to_dict(prestructured_data: list) -> dict: """ Convert delineated metadata into a dictionary\n @@ -83,8 +93,9 @@ def structured_metadata_list_to_dict(prestructured_data: list) -> dict: """ system_metadata = {} for key in prestructured_data: + logger.debug(key) if key == 'Hashes': - system_metadata[key] = json.loads(prestructured_data[key]) + system_metadata[key] = clean_with_json(prestructured_data, key) elif ': "' in key and ':' in key: # Handle TI hashes, split by colon and quote ti_hash_key = re.sub(r': .*$', '', key).strip() system_metadata[ti_hash_key] = prestructured_data[key] @@ -120,13 +131,61 @@ def dehashed_metadata_str_to_dict(dehashed_text: str) -> dict: generation_metadata = dict((pair.split(': ', 1) for pair in dehashed_pairs)) logger.debug(generation_metadata) - positive = next(iter(pos_key_val)) # Sample positive prompt - positive_prompt = pos_key_val[positive] # Separate prompts + positive = next(iter(pos_key_val), "") # Sample positive prompt + positive_prompt = pos_key_val.get(positive,None) # Separate prompts prompt_metadata = {"Positive prompt" : positive_prompt } | generation_metadata logger.debug("%s",f"{prompt_metadata}") return prompt_metadata, generation_metadata +def arrange_webui_metadata(header_chunks:str) -> dict: + """ + Using the header from a file, send to multiple formatting, cleaning, and parsing, processes \n + Return format : {"Prompts": , "Settings": , "System": } \n + :param header_chunks: `str` Header data from a file + :return: `dict` Metadata in a standardized format + """ + cleaned_text = format_chunk(header_chunks) + dehashed_text, structured_dict = extract_enclosed_values(cleaned_text) + system_metadata = structured_metadata_list_to_dict(structured_dict) + prompt_metadata, generation_metadata = dehashed_metadata_str_to_dict(dehashed_text) + logger.debug("%s",f"{prompt_metadata, generation_metadata, system_metadata}") + logger.debug("%s",f"{type(prompt_metadata), type(generation_metadata), type(system_metadata)}") + return {"Prompts": prompt_metadata, "Settings": generation_metadata, "System": system_metadata} + +def arrange_nodeui_metadata(header_chunks:str) ->dict: + """ + Using the header from a file, run formatting and parsing processes \n + Return format : {"Prompts": , "Settings": , "System": } \n + :param header_chunks: `str` Header data from a file + :return: `dict` Metadata in a standardized format + """ + test_metadata = clean_with_json(header_chunks, 'prompt') + search_keys = ["text"] + prompt_data = {} + between_items= {} + misc_items = {} + + logger.debug(test_metadata) + #or any(x in value.get('inputs') for x in search_keys): + for i, key in enumerate(test_metadata): + logger.debug(i) + value = test_metadata[key] + logger.debug(value) + key_name = value.get('class_type') + if "CLIPTextEncode" in key_name: + existing = prompt_data.get(key_name,{}) + logger.debug(existing) + prompt_data.setdefault(key_name, existing | test_metadata[key].get('inputs')) + elif i <= 2: + between_items[key_name] = value['inputs'] + else: + misc_items[key_name] = value['inputs'] + prompt_items = {"Prompt": prompt_data } + + return {'Prompts': prompt_items, 'Settings': between_items, "System": misc_items} + + def parse_metadata(file_path_named: str) -> dict: """ Extract the metadata from the header of an image file\n @@ -134,22 +193,14 @@ def parse_metadata(file_path_named: str) -> dict: :return: `dict` The metadata from the header of the file """ header_chunks = open_png_header(file_path_named) - + logger.debug("%s",f"{next(iter(header_chunks))}") metadata = None if next(iter(header_chunks)) == 'parameters': # A1111 format - logger.debug("%s",f"{next(iter(header_chunks))}") - cleaned_text = format_chunk(header_chunks) - dehashed_text, structured_dict = extract_enclosed_values(cleaned_text) - system_metadata = structured_metadata_list_to_dict(structured_dict) - prompt_metadata, generation_metadata = dehashed_metadata_str_to_dict(dehashed_text) - logger.debug("%s",f"{prompt_metadata, generation_metadata, system_metadata}") - logger.debug("%s",f"{type(prompt_metadata), type(generation_metadata), type(system_metadata)}") - - elif next(iter(header_chunks)) == 'prompt': - # """Placeholder""" - pass - metadata = {"Prompts": prompt_metadata, "Settings": generation_metadata, "System": system_metadata} + metadata = arrange_webui_metadata(header_chunks) + elif next(iter(header_chunks)) == 'prompt': # ComfyUI format + metadata = arrange_nodeui_metadata(header_chunks) + return metadata # hash_sample = re.search(r', cleaned_text) diff --git a/dataset_tools/ui.py b/dataset_tools/ui.py index 17a3ecd..919b901 100644 --- a/dataset_tools/ui.py +++ b/dataset_tools/ui.py @@ -203,6 +203,7 @@ def load_metadata(self, file_path: str, extension: str='.png') -> dict: :param : :return: """ + metadata = None try: metadata = open_jpg_header(file_path) if (extension == Ext.JPEG or extension == Ext.WEBP) else parse_metadata(file_path) except IndexError as error_log: @@ -211,8 +212,7 @@ def load_metadata(self, file_path: str, extension: str='.png') -> dict: logger.info("Variable not declared while extracting metadata from %s", f"{file_path}, {error_log}", exc_info=EXC_INFO) except ValueError as error_log: logger.info("Invalid dictionary formatting while extracting metadata from %s", f"{file_path}, {error_log}", exc_info=EXC_INFO) - else: - return metadata + return metadata def display_metadata(self, metadata, file_path): """direct collated data to fields and pretty print there""" @@ -222,10 +222,8 @@ def display_metadata(self, metadata, file_path): self.top_separator.setText('Prompt Data:') self.mid_separator.setText('Generation Data:') try: - prompt_data = metadata[next(iter(metadata))] - prompt_fields = f"{prompt_data.get('Positive prompt')}\n{prompt_data.get('Negative prompt')}" - logger.debug("%s",f"{prompt_data.get('Positive prompt')}") - self.upper_box.setText(prompt_fields) + prompt_data = metadata.get('Prompts') + self.upper_box.setText(''.join(f"{prompt_data.get(k)}\n" for k in prompt_keys if prompt_data.get(k))) except TypeError as error_log: logger.info("Invalid data in prompt fields %s", f" {type(metadata)} from {file_path}, {metadata} : {error_log}", exc_info=EXC_INFO) except KeyError as error_log: