Skip to content

Commit

Permalink
Merge pull request #66 from Living-with-machines/fix-rename-cli
Browse files Browse the repository at this point in the history
feat: add compression options and fix some path issues
  • Loading branch information
griff-rees authored Oct 11, 2023
2 parents 1f34019 + d2da360 commit bad89bd
Show file tree
Hide file tree
Showing 7 changed files with 392 additions and 60 deletions.
53 changes: 33 additions & 20 deletions alto2txt2fixture/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
from .settings import DATA_PROVIDER_INDEX, SETUP_TITLE, settings
from .types import dotdict
from .utils import (
COMPRESSED_PATH_DEFAULT,
COMPRESSION_TYPE_DEFAULT,
FILE_NAME_0_PADDING_DEFAULT,
ZIP_FILE_EXTENSION,
ArchiveFormatEnum,
check_newspaper_collection_configuration,
compress_fixture,
Expand All @@ -33,8 +34,6 @@

cli = typer.Typer(pretty_exceptions_show_locals=False)

COMPRESSION_TYPE_DEFAULT: Final[str] = ZIP_FILE_EXTENSION
COMPRESSED_PATH_DEFAULT: Final[Path] = Path("compressed")
FILE_RENAME_TABLE_TITLE_DEFAULT: Final[str] = "Current to New File Names"


Expand All @@ -59,6 +58,14 @@ def plaintext(
digit_padding: Annotated[
int, typer.Option(help="Padding '0's for indexing json fixture filenames")
] = FILE_NAME_0_PADDING_DEFAULT,
compress: Annotated[bool, typer.Option(help="Compress json fixtures")] = False,
compress_path: Annotated[
Path, typer.Option(help="Folder to compress json fixtueres to")
] = Path(COMPRESSED_PATH_DEFAULT),
compress_format: Annotated[
ArchiveFormatEnum,
typer.Option(case_sensitive=False, help="Compression format"),
] = COMPRESSION_TYPE_DEFAULT,
) -> None:
"""Create a PlainTextFixture and save to `save_path`."""
plaintext_fixture = PlainTextFixture(
Expand All @@ -69,6 +76,8 @@ def plaintext(
initial_pk=initial_pk,
max_plaintext_per_fixture_file=records_per_json,
json_0_file_name_padding=digit_padding,
json_export_compression_format=compress_format,
json_export_compression_subdir=compress_path,
)
plaintext_fixture.info()
while (
Expand All @@ -89,6 +98,8 @@ def plaintext(
plaintext_fixture.info()
plaintext_fixture.extract_compressed()
plaintext_fixture.export_to_json_fixtures()
if compress:
plaintext_fixture.compress_json_exports()


@cli.command()
Expand All @@ -97,6 +108,9 @@ def rename(
folder: Annotated[
Path, typer.Option(help="Path under `path` for new files")
] = Path(),
renumber: Annotated[
bool, typer.Option(help="Show changes without applying")
] = False,
regex: Annotated[str, typer.Option(help="Regex to filter files")] = "*.txt",
padding: Annotated[
int, typer.Option(help="Digits to pad file name")
Expand All @@ -107,11 +121,11 @@ def rename(
compress_format: Annotated[
ArchiveFormatEnum,
typer.Option(case_sensitive=False, help="Compression format"),
] = ArchiveFormatEnum.ZIP,
] = COMPRESSION_TYPE_DEFAULT,
compress_suffix: Annotated[
str, typer.Option(help="Compressed file name suffix")
] = "",
compress_subfolder: Annotated[
compress_folder: Annotated[
Path, typer.Option(help="Optional folder to differ from renaming")
] = COMPRESSED_PATH_DEFAULT,
delete_uncompressed: Annotated[
Expand All @@ -126,9 +140,8 @@ def rename(
) -> None:
"""Manage file names and compression."""
logger.level = log_level
reindex: bool = False or not dry_run
folder_path: Path = Path(path) / folder
compress_path: Path = Path(folder_path) / compress_subfolder
compress_path: Path = Path(path) / compress_folder

try:
paths_dict: dict[os.PathLike, os.PathLike] = glob_path_rename_by_0_padding(
Expand Down Expand Up @@ -166,18 +179,18 @@ def rename(
)
console.print(config_table)

file_names_table: Table = file_rename_taple(
file_names_table: Table = file_rename_table(
paths_dict,
compress_format=compress_format,
title=FILE_RENAME_TABLE_TITLE_DEFAULT,
prefix=prefix,
reindex=reindex,
renumber=renumber,
)
console.print(file_names_table)

if dry_run:
if not force:
reindex = Confirm.ask(
renumber = Confirm.ask(
f"Copy {'and compress ' if compress else ''}"
f"{files_count} files "
f"from:\n\t'{path}'\nto:\n\t'{folder_path}'\n"
Expand All @@ -190,18 +203,18 @@ def rename(
f"\n'{compress_path}'\n",
default="n",
)
if reindex:
if renumber:
copy_dict_paths(paths_dict)
if compress:
for old_path, new_path in paths_dict.items():
file_path: Path = Path(new_path) if reindex else Path(old_path)
file_path: Path = Path(new_path) if renumber else Path(old_path)
compress_fixture(
file_path,
output_path=compress_path,
suffix=compress_suffix,
format=compress_format,
)
if delete_uncompressed and reindex:
if delete_uncompressed and renumber:
console.print(f"Deleting {new_path}")
Path(new_path).unlink()

Expand Down Expand Up @@ -347,26 +360,26 @@ def func_table(
return table


def file_rename_taple(
def file_rename_table(
paths_dict: dict[os.PathLike, os.PathLike],
compress_format: ArchiveFormatEnum = ArchiveFormatEnum.ZIP,
compress_format: ArchiveFormatEnum = COMPRESSION_TYPE_DEFAULT,
title: str = FILE_RENAME_TABLE_TITLE_DEFAULT,
prefix: str = "",
reindex: bool = True,
renumber: bool = True,
) -> Table:
"""Create a `rich.Table` of rename configuration.
Args:
paths_dict: dict[os.PathLike, os.PathLike],
Original and Reindexed `paths` `dict`
Original and renumbered `paths` `dict`
compress_format:
Which `ArchiveFormatEnum` for compression
title:
Title of returned `Table`
prefix:
`str` to add in front of every new path
reindex:
Whether an `int` in each path will be reindexed.
renumber:
Whether an `int` in each path will be renumbered.
"""
table: Table = Table(title=title)
Expand All @@ -381,6 +394,6 @@ def final_file_name(name: os.PathLike) -> str:
)

for old_path, new_path in paths_dict.items():
name: str = final_file_name(new_path if reindex else old_path)
name: str = final_file_name(new_path if renumber else old_path)
table.add_row(Path(old_path).name, name)
return table
32 changes: 18 additions & 14 deletions alto2txt2fixture/create_adjacent_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from os import PathLike
from pathlib import Path
from shutil import rmtree
from typing import Final, NotRequired, Sequence, TypedDict
from typing import Any, Final, NotRequired, Sequence, TypedDict
from urllib.request import urlopen

import numpy as np
Expand Down Expand Up @@ -153,30 +153,34 @@ def csv2json_list(
saved: list[Path] | None = None,
indent: int = JSON_INDENT,
) -> list:
"""Save `csv_path` as a `json` file and return as a `list`."""
json_data = []
# See this suggestion for `nan` values: https://stackoverflow.com/a/62691803/678486
df = (
"""Save `csv_path` as a `json` file and return as a `list`.
Note:
Managing `Pandas` `DataFrame` `nan` values via suggestion:
https://stackoverflow.com/a/62691803/678486
"""
json_data: list[dict[str, Any]] = []
df: pd.DataFame = (
pd.read_csv(csv_path, index_col=0).fillna(np.nan).replace([np.nan], [None])
) # fillna(None)
)

if "political_leanings" in df.columns:
df["political_leanings"] = df["political_leanings"].apply(json.loads)
if "prices" in df.columns:
df["prices"] = df["prices"].apply(json.loads)

model = Path(csv_path).stem.lower()
model: str = Path(csv_path).stem.lower()

for pk, row in df.iterrows():
fields = row.to_dict()
fields: dict[str, Any] = row.to_dict()
json_data.append({"pk": pk, "model": model, "fields": fields})

(Path(output_path) / csv_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_path / f"{Path(csv_path).stem}.json").write_text(
json.dumps(json_data, indent=indent)
)
if not saved is None:
saved.append(output_path / f"{Path(csv_path).stem}.json")
json_path: Path = Path(output_path) / f"{Path(csv_path).stem}.json"
json_path.parent.mkdir(parents=True, exist_ok=True)
json_path.write_text(json.dumps(json_data, indent=indent))
if saved and isinstance(saved, list):
saved.append(json_path)
return json_data


Expand Down
Loading

0 comments on commit bad89bd

Please sign in to comment.