From 9cbee8261b950d142ab2bdca89f16e1e5cff9d75 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Wed, 11 Oct 2023 01:14:05 +0100 Subject: [PATCH 1/3] feat: add compression options with path fixes --- alto2txt2fixture/cli.py | 53 ++++--- alto2txt2fixture/create_adjacent_tables.py | 32 +++-- alto2txt2fixture/plaintext.py | 154 ++++++++++++++++++++- alto2txt2fixture/utils.py | 102 +++++++++++--- tests/test_cli.py | 16 ++- tests/test_create_adjacent_tables.py | 7 +- tests/test_utils.py | 84 +++++++++++ 7 files changed, 388 insertions(+), 60 deletions(-) diff --git a/alto2txt2fixture/cli.py b/alto2txt2fixture/cli.py index 3991fe0..2e4b8d5 100644 --- a/alto2txt2fixture/cli.py +++ b/alto2txt2fixture/cli.py @@ -18,8 +18,9 @@ from .settings import DATA_PROVIDER_INDEX, SETUP_TITLE, settings from .types import dotdict from .utils import ( + COMPRESSED_PATH_DEFAULT, + COMPRESSION_TYPE_DEFAULT, FILE_NAME_0_PADDING_DEFAULT, - ZIP_FILE_EXTENSION, ArchiveFormatEnum, check_newspaper_collection_configuration, compress_fixture, @@ -33,8 +34,6 @@ cli = typer.Typer(pretty_exceptions_show_locals=False) -COMPRESSION_TYPE_DEFAULT: Final[str] = ZIP_FILE_EXTENSION -COMPRESSED_PATH_DEFAULT: Final[Path] = Path("compressed") FILE_RENAME_TABLE_TITLE_DEFAULT: Final[str] = "Current to New File Names" @@ -59,6 +58,14 @@ def plaintext( digit_padding: Annotated[ int, typer.Option(help="Padding '0's for indexing json fixture filenames") ] = FILE_NAME_0_PADDING_DEFAULT, + compress: Annotated[bool, typer.Option(help="Compress json fixtures")] = False, + compress_path: Annotated[ + Path, typer.Option(help="Folder to compress json fixtueres to") + ] = Path(COMPRESSED_PATH_DEFAULT), + compress_format: Annotated[ + ArchiveFormatEnum, + typer.Option(case_sensitive=False, help="Compression format"), + ] = COMPRESSION_TYPE_DEFAULT, ) -> None: """Create a PlainTextFixture and save to `save_path`.""" plaintext_fixture = PlainTextFixture( @@ -69,6 +76,8 @@ def plaintext( initial_pk=initial_pk, max_plaintext_per_fixture_file=records_per_json, json_0_file_name_padding=digit_padding, + json_export_compression_format=compress_format, + json_export_compression_subdir=compress_path, ) plaintext_fixture.info() while ( @@ -89,6 +98,8 @@ def plaintext( plaintext_fixture.info() plaintext_fixture.extract_compressed() plaintext_fixture.export_to_json_fixtures() + if compress: + plaintext_fixture.compress_json_exports() @cli.command() @@ -97,6 +108,9 @@ def rename( folder: Annotated[ Path, typer.Option(help="Path under `path` for new files") ] = Path(), + renumber: Annotated[ + bool, typer.Option(help="Show changes without applying") + ] = False, regex: Annotated[str, typer.Option(help="Regex to filter files")] = "*.txt", padding: Annotated[ int, typer.Option(help="Digits to pad file name") @@ -107,11 +121,11 @@ def rename( compress_format: Annotated[ ArchiveFormatEnum, typer.Option(case_sensitive=False, help="Compression format"), - ] = ArchiveFormatEnum.ZIP, + ] = COMPRESSION_TYPE_DEFAULT, compress_suffix: Annotated[ str, typer.Option(help="Compressed file name suffix") ] = "", - compress_subfolder: Annotated[ + compress_folder: Annotated[ Path, typer.Option(help="Optional folder to differ from renaming") ] = COMPRESSED_PATH_DEFAULT, delete_uncompressed: Annotated[ @@ -126,9 +140,8 @@ def rename( ) -> None: """Manage file names and compression.""" logger.level = log_level - reindex: bool = False or not dry_run folder_path: Path = Path(path) / folder - compress_path: Path = Path(folder_path) / compress_subfolder + compress_path: Path = Path(path) / compress_folder try: paths_dict: dict[os.PathLike, os.PathLike] = glob_path_rename_by_0_padding( @@ -166,18 +179,18 @@ def rename( ) console.print(config_table) - file_names_table: Table = file_rename_taple( + file_names_table: Table = file_rename_table( paths_dict, compress_format=compress_format, title=FILE_RENAME_TABLE_TITLE_DEFAULT, prefix=prefix, - reindex=reindex, + renumber=renumber, ) console.print(file_names_table) if dry_run: if not force: - reindex = Confirm.ask( + renumber = Confirm.ask( f"Copy {'and compress ' if compress else ''}" f"{files_count} files " f"from:\n\t'{path}'\nto:\n\t'{folder_path}'\n" @@ -190,18 +203,18 @@ def rename( f"\n'{compress_path}'\n", default="n", ) - if reindex: + if renumber: copy_dict_paths(paths_dict) if compress: for old_path, new_path in paths_dict.items(): - file_path: Path = Path(new_path) if reindex else Path(old_path) + file_path: Path = Path(new_path) if renumber else Path(old_path) compress_fixture( file_path, output_path=compress_path, suffix=compress_suffix, format=compress_format, ) - if delete_uncompressed and reindex: + if delete_uncompressed and renumber: console.print(f"Deleting {new_path}") Path(new_path).unlink() @@ -347,26 +360,26 @@ def func_table( return table -def file_rename_taple( +def file_rename_table( paths_dict: dict[os.PathLike, os.PathLike], - compress_format: ArchiveFormatEnum = ArchiveFormatEnum.ZIP, + compress_format: ArchiveFormatEnum = COMPRESSION_TYPE_DEFAULT, title: str = FILE_RENAME_TABLE_TITLE_DEFAULT, prefix: str = "", - reindex: bool = True, + renumber: bool = True, ) -> Table: """Create a `rich.Table` of rename configuration. Args: paths_dict: dict[os.PathLike, os.PathLike], - Original and Reindexed `paths` `dict` + Original and renumbered `paths` `dict` compress_format: Which `ArchiveFormatEnum` for compression title: Title of returned `Table` prefix: `str` to add in front of every new path - reindex: - Whether an `int` in each path will be reindexed. + renumber: + Whether an `int` in each path will be renumbered. """ table: Table = Table(title=title) @@ -381,6 +394,6 @@ def final_file_name(name: os.PathLike) -> str: ) for old_path, new_path in paths_dict.items(): - name: str = final_file_name(new_path if reindex else old_path) + name: str = final_file_name(new_path if renumber else old_path) table.add_row(Path(old_path).name, name) return table diff --git a/alto2txt2fixture/create_adjacent_tables.py b/alto2txt2fixture/create_adjacent_tables.py index 43d7c1a..01b51b0 100755 --- a/alto2txt2fixture/create_adjacent_tables.py +++ b/alto2txt2fixture/create_adjacent_tables.py @@ -3,7 +3,7 @@ from os import PathLike from pathlib import Path from shutil import rmtree -from typing import Final, NotRequired, Sequence, TypedDict +from typing import Any, Final, NotRequired, Sequence, TypedDict from urllib.request import urlopen import numpy as np @@ -153,30 +153,34 @@ def csv2json_list( saved: list[Path] | None = None, indent: int = JSON_INDENT, ) -> list: - """Save `csv_path` as a `json` file and return as a `list`.""" - json_data = [] - # See this suggestion for `nan` values: https://stackoverflow.com/a/62691803/678486 - df = ( + """Save `csv_path` as a `json` file and return as a `list`. + + Note: + Managing `Pandas` `DataFrame` `nan` values via suggestion: + https://stackoverflow.com/a/62691803/678486 + + """ + json_data: list[dict[str, Any]] = [] + df: pd.DataFame = ( pd.read_csv(csv_path, index_col=0).fillna(np.nan).replace([np.nan], [None]) - ) # fillna(None) + ) if "political_leanings" in df.columns: df["political_leanings"] = df["political_leanings"].apply(json.loads) if "prices" in df.columns: df["prices"] = df["prices"].apply(json.loads) - model = Path(csv_path).stem.lower() + model: str = Path(csv_path).stem.lower() for pk, row in df.iterrows(): - fields = row.to_dict() + fields: dict[str, Any] = row.to_dict() json_data.append({"pk": pk, "model": model, "fields": fields}) - (Path(output_path) / csv_path).parent.mkdir(parents=True, exist_ok=True) - Path(output_path / f"{Path(csv_path).stem}.json").write_text( - json.dumps(json_data, indent=indent) - ) - if not saved is None: - saved.append(output_path / f"{Path(csv_path).stem}.json") + json_path: Path = Path(output_path) / f"{Path(csv_path).stem}.json" + json_path.parent.mkdir(parents=True, exist_ok=True) + json_path.write_text(json.dumps(json_data, indent=indent)) + if saved and isinstance(saved, list): + saved.append(json_path) return json_data diff --git a/alto2txt2fixture/plaintext.py b/alto2txt2fixture/plaintext.py index 702b158..19ed5fa 100644 --- a/alto2txt2fixture/plaintext.py +++ b/alto2txt2fixture/plaintext.py @@ -3,6 +3,7 @@ from logging import getLogger from os import PathLike from pathlib import Path +from pprint import pformat from shutil import disk_usage, rmtree, unpack_archive from typing import Final, Generator, TypedDict from zipfile import ZipFile, ZipInfo @@ -17,11 +18,15 @@ PlaintextFixtureFieldsDict, ) from .utils import ( + COMPRESSED_PATH_DEFAULT, + COMPRESSION_TYPE_DEFAULT, FILE_NAME_0_PADDING_DEFAULT, TRUNC_HEADS_PATH_DEFAULT, TRUNC_TAILS_PATH_DEFAULT, ZIP_FILE_EXTENSION, + ArchiveFormatEnum, DiskUsageTuple, + compress_fixture, console, free_hd_space_in_GB, path_globs_to_tuple, @@ -197,6 +202,8 @@ class PlainTextFixture: export_directory: PathLike = DEFAULT_PLAINTEXT_FIXTURE_OUTPUT empty_info_default_str: str = "None" json_0_file_name_padding: int = FILE_NAME_0_PADDING_DEFAULT + json_export_compression_subdir: PathLike = COMPRESSED_PATH_DEFAULT + json_export_compression_format: ArchiveFormatEnum = COMPRESSION_TYPE_DEFAULT _trunc_head_paths: int = TRUNC_HEADS_PATH_DEFAULT _trunc_tails_paths: int = TRUNC_TAILS_PATH_DEFAULT _trunc_tails_sub_paths: int = TRUNC_TAILS_SUBPATH_DEFAULT @@ -678,10 +685,153 @@ def export_to_json_fixtures( max_elements_per_file=self.max_plaintext_per_fixture_file, file_name_0_padding=json_0_file_name_padding, ) - self._exported_json_paths = tuple( - Path(path) for path in sorted(Path(output_path).glob(f"**/{prefix}*.json")) + self.set_exported_json_paths( + export_directory=output_path, saved_fixture_prefix=prefix ) + @property + def exported_json_paths(self) -> Generator[Path, None, None]: + """If `self._exported_json_paths` return `Generator` of those paths. + + Yields: + Each path from `self._exported_json_paths` + + Example: + ```pycon + >>> if is_platform_win: + ... pytest.skip('decompression fails on Windows: issue #55') + >>> plaintext_bl_lwm = getfixture('bl_lwm_plaintext_json_export') + + ... + >>> tuple(plaintext_bl_lwm.exported_json_paths) + (...Path('...plaintext_fixture-000001.json'),) + + ``` + """ + if not hasattr(self, "_exported_json_paths"): + raise ValueError( + f"No '_exported_json_paths', " + f"run after 'self.export_to_json_fixtures()' for {self}" + ) + for path in self._exported_json_paths: + yield path + + def set_exported_json_paths( + self, + export_directory: PathLike | None, + saved_fixture_prefix: str | None, + overwrite: bool = False, + ) -> None: + """Set `self._exported_json_paths` for use with `self.exported_json_paths`. + + Note: + If provided `export_directory` and `saved_fixture_prefix` will + overwite those attributes on `self.` + + Params: + export_directory: + `Path` to check for saved `json` files. + saved_fixture_prefix: + `str` to prefix each exported `json` file with. + overwrite: + Force replace `self._exported_json_paths` if already set. + + Example: + ```pycon + >>> if is_platform_win: + ... pytest.skip('decompression fails on Windows: issue #55') + >>> tmp_path = getfixture('tmp_path') + >>> plaintext_bl_lwm = getfixture('bl_lwm_plaintext_json_export') + + ... + >>> tuple(plaintext_bl_lwm.exported_json_paths) + (...Path('...plaintext_fixture-000001.json'),) + >>> plaintext_bl_lwm.set_exported_json_paths(tmp_path, 'check-prefix') + Traceback (most recent call last): + ... + ValueError: Cannot overwrite 'self._exported_json_paths' without + 'overwrite' = True. Current 'self._exported_json_paths': + (...Path('...plaintext_fixture-000001.json'),) + >>> plaintext_bl_lwm.set_exported_json_paths(tmp_path, + ... 'check-prefix', overwrite=True) + + ...Force change '._exported_json_paths' in...>> plaintext_bl_lwm.export_directory == tmp_path + True + >>> plaintext_bl_lwm.saved_fixture_prefix + 'check-prefix' + + ``` + """ + if hasattr(self, "_exported_json_paths"): + if overwrite: + logger.info(f"Force change '._exported_json_paths' in {repr(self)}") + else: + raise ValueError( + f"Cannot overwrite 'self._exported_json_paths' without " + f"'overwrite' = True. Current 'self._exported_json_paths':\n " + f"{pformat(self._exported_json_paths)}" + ) + self.export_directory = ( + export_directory if export_directory else self.export_directory + ) + self.saved_fixture_prefix = ( + saved_fixture_prefix if saved_fixture_prefix else self.saved_fixture_prefix + ) + self._exported_json_paths = path_globs_to_tuple( + self.export_directory, f"**/{self.saved_fixture_prefix}*.json" + ) + + def compress_json_exports( + self, + output_path: PathLike | None = None, + format: ArchiveFormatEnum | None = None, + ) -> tuple[Path, ...]: + """Compress `self._exported_json_paths` to `format`. + + Args: + output_path: + `Path` to save compressed `json` files to. Uses + `self.json_export_compression_subdir` if `None` is passed. + format: + What compression format to use from `ArchiveFormatEnum`. Uses + `self.json_export_compression_format` if `None` is passed. + + Note: + Neither `output_path` nor `format` overwrite the related attributes + of `self`. + + Returns: The the `output_path` passed to save compressed `json`. + + Example: + ```pycon + >>> if is_platform_win: + ... pytest.skip('decompression fails on Windows: issue #55') + >>> plaintext_bl_lwm = getfixture('bl_lwm_plaintext_json_export') + + ... + >>> compressed_paths: Path = plaintext_bl_lwm.compress_json_exports( + ... format='tar') + + ...Compressing...'...01.json' to...'tar'...in:... + >>> compressed_paths + (...Path('.../plaintext_fixture-000001.json.tar'),) + + ``` + """ + output_path = ( + Path(self.json_export_compression_subdir) + if not output_path + else Path(output_path) + ) + format = self.json_export_compression_format if not format else format + compressed_paths: list[Path] = [] + for json_path in self.exported_json_paths: + compressed_paths.append( + compress_fixture(json_path, output_path=output_path, format=format) + ) + return tuple(compressed_paths) + # def delete_compressed(self, index: int | str | None = None) -> None: def delete_decompressed(self, ignore_errors: bool = True) -> None: """Remove all files in `self.extract_path`. diff --git a/alto2txt2fixture/utils.py b/alto2txt2fixture/utils.py index c226b07..2a3cd13 100644 --- a/alto2txt2fixture/utils.py +++ b/alto2txt2fixture/utils.py @@ -76,7 +76,10 @@ "ArchiveFormatEnum", tuple(f.upper() for f in ARCHIVE_FORMATS) ) -ZIP_FILE_EXTENSION: Final[str] = ArchiveFormatEnum.ZIP +ZIP_FILE_EXTENSION: Final[ArchiveFormatEnum] = ArchiveFormatEnum.ZIP + +COMPRESSION_TYPE_DEFAULT: Final[ArchiveFormatEnum] = ZIP_FILE_EXTENSION +COMPRESSED_PATH_DEFAULT: Final[Path] = Path("compressed") JSON_FILE_EXTENSION: str = "json" JSON_FILE_GLOB_STRING: str = f"**/*{JSON_FILE_EXTENSION}" @@ -1013,7 +1016,7 @@ def export_fixtures( def path_globs_to_tuple( path: PathLike, glob_regex_str: str = "*" ) -> tuple[PathLike, ...]: - """Return `glob` from `path` using `glob_regex_str` as a tuple. + """Return a sorted `tuple` of `Path`s in `path` using `glob_regex_str`. Args: path: @@ -1112,8 +1115,11 @@ def compress_fixture( path: PathLike, output_path: PathLike | str = settings.OUTPUT, suffix: str = "", - format: str = ZIP_FILE_EXTENSION, -) -> None: + format: str | ArchiveFormatEnum = ZIP_FILE_EXTENSION, + # base_dir: PathLike | None = None, + force_overwrite: bool = False, + dry_run: bool = False, +) -> Path: """Compress exported `fixtures` files using `make_archive`. Args: @@ -1124,8 +1130,9 @@ def compress_fixture( Compressed file name (without extension specified from `format`). format: - A `str` of one of the registered compression formats. - `Python` provides `zip`, `tar`, `gztar`, `bztar`, and `xztar` + A `str` of one of the registered compression formats. By default + `Python` provides `zip`, `tar`, `gztar`, `bztar`, and `xztar`. + See `ArchiveFormatEnum` variable for options checked. suffix: `str` to add to comprssed file name saved. @@ -1135,16 +1142,26 @@ def compress_fixture( Example: ```pycon - >>> tmp_path: Path = getfixture("tmp_path") >>> plaintext_bl_lwm = getfixture('bl_lwm_plaintext_json_export') - ...Compressed configs...%...[...] - >>> compress_fixture( - ... path=plaintext_bl_lwm._exported_json_paths[0], - ... output_path=tmp_path) + ... + >>> tmp_path = getfixture('tmp_path') + >>> json_path: Path = next(plaintext_bl_lwm.exported_json_paths) + >>> assert 'pytest-of' in str(json_path) + >>> compressed_path: Path = compress_fixture(path=json_path, + ... output_path=tmp_path, + ... dry_run=True) + + ...creating...'...plain...-...01.json.zip...'...addin... + ...'plain...01.json'...to...it... + >>> compressed_path.exists() + False + >>> compressed_path: Path = compress_fixture(path=json_path, + ... output_path=tmp_path, + ... dry_run=False) - ...Compressing...'...plain...-...01.json...'...to 'zip'... - ...in:...'...com...'... + ...creating...'...plain...-...01.json.zip...'...addin... + ...'plain...01.json'...to...it... >>> from zipfile import ZipFile, ZipInfo >>> zipfile_info_list: list[ZipInfo] = ZipFile( ... tmp_path / 'plaintext_fixture-000001.json.zip' @@ -1156,16 +1173,63 @@ def compress_fixture( ``` """ - if format not in ARCHIVE_FORMATS: - raise ValueError( - f"format '{format}' not valid, " - f"options are:'\n{pformat(ARCHIVE_FORMATS)}" - ) + path = Path(path) + current_dir: Path = Path() + root_dir: str | None = None + base_dir: str | None = None + if not path.exists(): + raise ValueError(f"Cannot compress not existent 'path': {path}") + if isinstance(format, str): + try: + format = ArchiveFormatEnum(format) + except ValueError: + raise ValueError( + f"format '{format}' not valid, " + f"options are:'\n{pformat(ARCHIVE_FORMATS)}" + ) + chdir(str(Path(path).parent)) + + if path.is_file(): + root_dir = str(Path(path).parent) + base_dir = path.name + # chdir(str(Path(path).parent)) + elif path.is_dir(): + # chdir(str(path)) + root_dir = path.name + + else: + raise ValueError(f"Path type {type(path)} is not supported.") + save_file_name: Path = Path(Path(path).stem + suffix + "".join(Path(path).suffixes)) save_path: Path = Path(output_path) / save_file_name + # root_dir: Path = save_path.parent + if Path(str(save_path) + f".{format}").exists(): + error_message: str = f"Path to save to already exists: '{save_path}'" + if force_overwrite: + logger.warn(error_message) + logger.warn(f"Overwriting '{save_path}'") + else: + raise ValueError(error_message) logger.info(f"Compressing '{path}' to '{format}' in: '{save_path.parent}'") - make_archive(str(save_path), format=format, base_dir=path) + # if Path(path).is_file(): + # logger.info(f"'path' to {format} is a file. Setting 'base_dir' to: '{path}'") + # base_dir = path + + archive_path: Path = Path( + make_archive( + base_name=str(save_path), + format=str(format), + root_dir=root_dir, + base_dir=base_dir, + # root_dir=str(Path()), + # base_dir=path, + dry_run=dry_run, + logger=logger, + ) + ) + chdir(current_dir) + return archive_path def paths_with_newlines( diff --git a/tests/test_cli.py b/tests/test_cli.py index 27ccab9..98f8f74 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -97,6 +97,7 @@ def test_rename_cli( str(output_path), "--regex", "*.txt", + "--renumber", run_type, ], input=input, @@ -126,7 +127,20 @@ def test_rename_compress( for path in tmp_json_fixtures: assert path.is_file() rename(tmp_path, compress=True, force=True) - stdout: list[str] = capsys.readouterr().out + stdout: str = capsys.readouterr().out + info_txts: tuple[str, ...] = ( + "compress_format", + "zip", + "compress_suffix", + "''", + "compress_folder", + "compressed", + ) + for text in info_txts: + assert text in stdout + for path in tmp_json_fixtures: + assert path.name in stdout + assert path.name + ".zip" in stdout for path in tmp_json_fixtures: zip_path: Path = path.parent / COMPRESSED_PATH_DEFAULT / (path.name + ".zip") assert zip_path.is_file() diff --git a/tests/test_create_adjacent_tables.py b/tests/test_create_adjacent_tables.py index 6e14c15..4146b4b 100644 --- a/tests/test_create_adjacent_tables.py +++ b/tests/test_create_adjacent_tables.py @@ -9,7 +9,6 @@ GAZETTEER_OUT_FILENAMES, ISSUE, MITCHELLS_OUT_FILENAMES, - OUTPUT, RemoteDataFilesType, TableOutputConfigType, csv2json_list, @@ -56,8 +55,8 @@ def test_local_result_paths(adjacent_data_run_results) -> None: MITCHELLS_OUT_FILENAMES | GAZETTEER_OUT_FILENAMES ) for paths_dict in all_outfiles.values(): - assert Path(OUTPUT / paths_dict["csv"]).is_file() - assert Path(OUTPUT / paths_dict["json"]).is_file() + assert Path(adjacent_data_run_results / paths_dict["csv"]).is_file() + assert Path(adjacent_data_run_results / paths_dict["json"]).is_file() @pytest.mark.download @@ -65,7 +64,7 @@ def test_csv2json_list(adjacent_data_run_results) -> None: """Test converting a `csv` file to `json` `Django` `fixture`.""" test_mitchells_write_folder: Path = Path("test_mitchells") mitchells_issue_csv_path: Path = ( - Path(OUTPUT) / MITCHELLS_OUT_FILENAMES[ISSUE]["csv"] + adjacent_data_run_results / MITCHELLS_OUT_FILENAMES[ISSUE]["csv"] ) mitchells_issue_df: DataFrame = read_csv(mitchells_issue_csv_path) mitchells_out: list = csv2json_list( diff --git a/tests/test_utils.py b/tests/test_utils.py index 2a36d36..361c862 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,6 @@ +from logging import DEBUG from pathlib import Path, PureWindowsPath +from zipfile import ZipFile, ZipInfo import pytest @@ -8,8 +10,11 @@ TableOutputConfigType, download_data, ) +from alto2txt2fixture.plaintext import PlainTextFixture from alto2txt2fixture.utils import ( + ArchiveFormatEnum, check_newspaper_collection_configuration, + compress_fixture, truncate_path_str, ) @@ -94,3 +99,82 @@ def test_windows_root_path_truncate( _force_type=PureWindowsPath, ) assert short_root == correct_win_path_trunc_str + + +@pytest.mark.parametrize( + "compress_files_count, compress_type", ((1, "zip"), (2, "zip"), (1, "tar")) +) +def test_compress_fixtures( + tmp_path: Path, + bl_lwm_plaintext_json_export: PlainTextFixture, + compress_files_count: int, + compress_type: str, + caplog, +) -> None: + """Test compressing one or more files.""" + caplog.set_level = DEBUG + compressed_extension: str = f".{ArchiveFormatEnum(compress_type)}" + multiple_files_path: Path = Path(f"multiple-files-to-{compress_type}") + uncompressed_json: str = "plaintext_fixture-000001.json" + compressed_json_filename: str = uncompressed_json + compressed_extension + path_to_compress: Path + compressed_path: Path + json_path: Path = next(bl_lwm_plaintext_json_export.exported_json_paths) + files_to_compress: tuple[Path, ...] + create_log_msg: str + + assert "pytest-of" in str(json_path) + + if compress_files_count == 1: + path_to_compress = json_path + + else: + path_to_compress = json_path.parent / multiple_files_path + path_to_compress.mkdir(exist_ok=True) + json_path.rename(path_to_compress / json_path.name) + for i in range(compress_files_count - 1): + (path_to_compress / f"test_file_{i}").touch() + files_to_compress = tuple(path_to_compress.iterdir()) + assert len(files_to_compress) == compress_files_count + + if compress_type == ArchiveFormatEnum.ZIP: + create_log_msg = ( + f"creating '{path_to_compress}{compressed_extension}' " "and adding " + ) + if compress_files_count == 1: + create_log_msg += f"'{path_to_compress.name}' to it" + else: + create_log_msg += "'.' to it" + else: + create_log_msg = f"Creating {compress_type} archive" + + compressed_path = compress_fixture( + path=path_to_compress, output_path=tmp_path, dry_run=True, format=compress_type + ) + + assert caplog.messages[1] == create_log_msg + + compressed_path = compress_fixture( + path=path_to_compress, + output_path=tmp_path, + dry_run=False, + format=compress_type, + ) + + assert compressed_path.stem == path_to_compress.name + assert compressed_path.parent == json_path.parent + assert compressed_path.suffix == compressed_extension + if compress_files_count == 1: + assert compressed_path.name == compressed_json_filename + assert compressed_path.stem == json_path.name + assert json_path.is_file() + else: + assert compressed_path.stem == str(multiple_files_path) + assert not json_path.is_file() + if compress_type == ArchiveFormatEnum.ZIP: + zipfile_info_list: list[ZipInfo] = ZipFile(compressed_path).infolist() + assert len(zipfile_info_list) == compress_files_count + json_file_index: int = 0 if compress_files_count == 1 else -1 + assert ( + Path(zipfile_info_list[json_file_index].filename).name == uncompressed_json + ) From 57d68a871ce4a87545aad4544ba19d5a6109e096 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Wed, 11 Oct 2023 01:30:50 +0100 Subject: [PATCH 2/3] fix(test): tweak caplog `test_compress_fixtures` test for macOS GitHub --- tests/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 361c862..37c9da8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -152,7 +152,8 @@ def test_compress_fixtures( path=path_to_compress, output_path=tmp_path, dry_run=True, format=compress_type ) - assert caplog.messages[1] == create_log_msg + if len(caplog.messages) == 2: # GitHub action macOS gets only 1 log + assert caplog.messages[1] == create_log_msg compressed_path = compress_fixture( path=path_to_compress, From d2da3605ad9f5f1d779eb3f1a436d057f4ccdf6e Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Wed, 11 Oct 2023 01:47:08 +0100 Subject: [PATCH 3/3] fix(test): skip ordering issues for compression with windows --- tests/test_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 37c9da8..3b89249 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -109,6 +109,7 @@ def test_compress_fixtures( bl_lwm_plaintext_json_export: PlainTextFixture, compress_files_count: int, compress_type: str, + is_platform_win: bool, caplog, ) -> None: """Test compressing one or more files.""" @@ -176,6 +177,8 @@ def test_compress_fixtures( zipfile_info_list: list[ZipInfo] = ZipFile(compressed_path).infolist() assert len(zipfile_info_list) == compress_files_count json_file_index: int = 0 if compress_files_count == 1 else -1 - assert ( - Path(zipfile_info_list[json_file_index].filename).name == uncompressed_json - ) + if not is_platform_win: # compression ordering differes + assert ( + Path(zipfile_info_list[json_file_index].filename).name + == uncompressed_json + )