Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set default value of False for is_publishable #62

Merged
merged 1 commit into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions dpytools/email/ses/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def __init__(self, sender: str, aws_region: str):
# check sender is actually a valid email
try:
validated_email = validate_email(sender)
sender = validated_email["email"]
sender = validated_email.normalized
print(sender)
except EmailNotValidError as err:
raise ValueError(f"Invalid sender email: {err}")

Expand Down Expand Up @@ -67,7 +68,7 @@ def send(self, recipient: str, subject: str, body: str):
# check recipient is actually a valid email
try:
validated_email = validate_email(recipient)
recipient = validated_email["email"]
recipient = validated_email.normalized
except EmailNotValidError as err:
raise ValueError(f"Invalid recipient email: {err}")

Expand Down
12 changes: 10 additions & 2 deletions dpytools/http/upload/base_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ def _upload_new(
self,
file_path: Union[Path, str],
mimetype: str,
chunk_size: Optional[int],
alias_name: Optional[str],
title: Optional[str],
is_publishable: Optional[bool],
license: Optional[str],
license_url: Optional[str],
chunk_size: int = 5242880,
) -> None:
"""
Upload files to the DP Upload Service `upload-new` endpoint. The file to be uploaded (located at `file_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket. The file type should be specified as `mimetype` (e.g. "text/csv" for a CSV file).
Expand All @@ -78,7 +79,14 @@ def _upload_new(

# Generate upload request params
upload_params = _generate_upload_new_params(
file_path, chunk_size, mimetype, alias_name, title, license, license_url
file_path,
mimetype,
chunk_size,
alias_name,
title,
is_publishable,
license,
license_url,
)
logger.info(
"Upload parameters generated", data={"upload_params": upload_params}
Expand Down
26 changes: 16 additions & 10 deletions dpytools/http/upload/upload_service_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,12 @@ def upload_json(
def upload_new_csv(
self,
csv_path: Union[Path, str],
chunk_size: Optional[int] = 5242880,
is_publishable: Optional[bool] = False,
alias_name: Optional[str] = None,
title: Optional[str] = None,
license: Optional[str] = None,
license_url: Optional[str] = None,
chunk_size: int = 5242880,
) -> None:
"""
Upload csv files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `csv_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket.
Expand All @@ -69,21 +70,23 @@ def upload_new_csv(
self._upload_new(
csv_path,
"text/csv",
chunk_size,
alias_name,
title,
is_publishable,
license,
license_url,
chunk_size,
)

def upload_new_sdmx(
self,
sdmx_path: Union[Path, str],
chunk_size: Optional[int] = 5242880,
is_publishable: Optional[bool] = False,
alias_name: Optional[str] = None,
title: Optional[str] = None,
license: Optional[str] = None,
license_url: Optional[str] = None,
chunk_size: int = 5242880,
) -> None:
"""
Upload sdmx files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `sdmx_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket.
Expand All @@ -93,33 +96,36 @@ def upload_new_sdmx(
self._upload_new(
sdmx_path,
"application/xml",
chunk_size,
alias_name,
title,
is_publishable,
license,
license_url,
chunk_size,
)

def upload_new_json(
self,
json_path: Union[Path, str],
chunk_size: Optional[int] = 5242880,
is_publishable: Optional[bool] = False,
alias_name: Optional[str] = None,
title: Optional[str] = None,
license: Optional[str] = None,
license_url: Optional[str] = None,
chunk_size: int = 5242880,
) -> None:
"""
Upload json files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `json_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket.

`alias_name` and `title` are optional arguments. If these are not explicitly provided, `alias_name` will default to the filename with the extension, and `title` will default to the filename without the extension - e.g. if the filename is "data.json", `alias_name` defaults to "data.json" and `title` defaults to "data".
"""
self._upload_new(
json_path,
"application/json",
alias_name,
json_path,
"application/json",
chunk_size,
alias_name,
title,
is_publishable,
license,
license_url,
chunk_size,
license_url,
)
28 changes: 13 additions & 15 deletions dpytools/http/upload/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ def _generate_upload_params(file_path: Path, mimetype: str, chunk_size: int) ->

def _generate_upload_new_params(
file_path: Path,
chunk_size: int,
mimetype: str,
chunk_size: Optional[int],
alias_name: Optional[str],
title: Optional[str],
is_publishable: bool = False,
licence: str = "Open Government Licence v3.0",
licence_url: str = "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
is_publishable: Optional[bool],
licence: Optional[str],
licence_url: Optional[str],
) -> dict:
"""
Generate request parameters that do not change when iterating through the list of file chunks.
Expand All @@ -52,26 +52,29 @@ def _generate_upload_new_params(
total_size = os.path.getsize(file_path)

# Get filename from csv filepath
filename = str(file_path).split("/")[-1]
filename = file_path.name

# Get timestamp to create `resumableIdentifier` value in `upload_params`
timestamp = datetime.now().strftime("%d%m%y%H%M%S")

# Create identifier from timestamp and filename
identifier = f"{timestamp}-{filename.replace('.', '-')}"

# If alias name not provided, default to filename (with extension)
if alias_name is None:
alias_name = filename

# If title not provided, default to filename (without extension)
if title is None:
title = filename.split(".")[0]
title = file_path.stem

if licence is None:
licence = "Open Government Licence v3.0"

if licence_url is None:
licence_url = "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"

if licence_url is None:
licence_url = (
"http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
)

# Generate upload request params
upload_params = {
Expand All @@ -83,15 +86,10 @@ def _generate_upload_new_params(
"resumableFilename": filename,
"resumableRelativePath": str(file_path),
"aliasName": alias_name,
# TODO Currently the POST request in `_upload_file_chunks` is failing due to an potential issue with the Go code (HTTP 500 error: `bad request: unknown error: : duplicate file path`)
# Once the Go issue is resolved, check that the Path is in the correct format
# See https://github.com/ONSdigital/dp-api-clients-go/blob/a26491512a8336ad9c31b694c045d8e3a3ed0578/files/client.go#L160
"Path": f"datasets/{identifier}",
"isPublishable": is_publishable,
"Title": title,
# `SizeInBytes` may be populated from `resumableTotalSize` - check once `Path` issue has been resolved
"SizeInBytes": total_size,
# `Type` may be populated from `resumableType` - check once `Path` issue has been resolved
"Type": mimetype,
"Licence": licence,
"LicenceUrl": licence_url,
Expand Down
18 changes: 14 additions & 4 deletions tests/http/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ def test_generate_upload_new_params_for_csv():
Ensures that _generate_upload_new_params() populates the upload_params dict with the correct values
"""
upload_params = _generate_upload_new_params(
file_path="tests/test_cases/countries.csv",
file_path=Path("tests/test_cases/countries.csv"),
mimetype="text/csv",
chunk_size=5242880,
title="title",
alias_name="alias-name",
mimetype="text/csv",
is_publishable=False,
licence="My licence",
licence_url="www.example.org/licence",
)
assert upload_params["resumableTotalChunks"] == 2
assert upload_params["resumableTotalSize"] == 6198846
Expand All @@ -57,15 +60,20 @@ def test_generate_upload_new_params_for_csv():
assert upload_params["resumableRelativePath"] == "tests/test_cases/countries.csv"
assert upload_params["aliasName"] == "alias-name"
assert upload_params["Title"] == "title"
assert upload_params["Licence"] == "My licence"
assert upload_params["LicenceUrl"] == "www.example.org/licence"


def test_generate_new_upload_params_for_sdmx():
upload_params = _generate_upload_new_params(
file_path="tests/test_cases/test.xml",
chunk_size=5242880,
file_path=Path("tests/test_cases/test.xml"),
mimetype="application/xml",
chunk_size=5242880,
alias_name="alias-name",
title="title",
is_publishable=False,
licence="My licence",
licence_url="www.example.org/licence",
)
assert upload_params["resumableTotalChunks"] == 1
assert upload_params["resumableTotalSize"] == 3895
Expand All @@ -75,3 +83,5 @@ def test_generate_new_upload_params_for_sdmx():
assert upload_params["resumableRelativePath"] == "tests/test_cases/test.xml"
assert upload_params["aliasName"] == "alias-name"
assert upload_params["Title"] == "title"
assert upload_params["Licence"] == "My licence"
assert upload_params["LicenceUrl"] == "www.example.org/licence"
Loading