diff --git a/dpytools/email/ses/client.py b/dpytools/email/ses/client.py index 1b361ba..d978778 100644 --- a/dpytools/email/ses/client.py +++ b/dpytools/email/ses/client.py @@ -30,7 +30,8 @@ def __init__(self, sender: str, aws_region: str): # check sender is actually a valid email try: validated_email = validate_email(sender) - sender = validated_email["email"] + sender = validated_email.normalized + print(sender) except EmailNotValidError as err: raise ValueError(f"Invalid sender email: {err}") @@ -67,7 +68,7 @@ def send(self, recipient: str, subject: str, body: str): # check recipient is actually a valid email try: validated_email = validate_email(recipient) - recipient = validated_email["email"] + recipient = validated_email.normalized except EmailNotValidError as err: raise ValueError(f"Invalid recipient email: {err}") diff --git a/dpytools/http/upload/base_upload.py b/dpytools/http/upload/base_upload.py index cb665d6..ab70a6e 100644 --- a/dpytools/http/upload/base_upload.py +++ b/dpytools/http/upload/base_upload.py @@ -59,11 +59,12 @@ def _upload_new( self, file_path: Union[Path, str], mimetype: str, + chunk_size: Optional[int], alias_name: Optional[str], title: Optional[str], + is_publishable: Optional[bool], license: Optional[str], license_url: Optional[str], - chunk_size: int = 5242880, ) -> None: """ Upload files to the DP Upload Service `upload-new` endpoint. The file to be uploaded (located at `file_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket. The file type should be specified as `mimetype` (e.g. "text/csv" for a CSV file). @@ -78,7 +79,14 @@ def _upload_new( # Generate upload request params upload_params = _generate_upload_new_params( - file_path, chunk_size, mimetype, alias_name, title, license, license_url + file_path, + mimetype, + chunk_size, + alias_name, + title, + is_publishable, + license, + license_url, ) logger.info( "Upload parameters generated", data={"upload_params": upload_params} diff --git a/dpytools/http/upload/upload_service_client.py b/dpytools/http/upload/upload_service_client.py index f15ee1d..715dec5 100644 --- a/dpytools/http/upload/upload_service_client.py +++ b/dpytools/http/upload/upload_service_client.py @@ -55,11 +55,12 @@ def upload_json( def upload_new_csv( self, csv_path: Union[Path, str], + chunk_size: Optional[int] = 5242880, + is_publishable: Optional[bool] = False, alias_name: Optional[str] = None, title: Optional[str] = None, license: Optional[str] = None, license_url: Optional[str] = None, - chunk_size: int = 5242880, ) -> None: """ Upload csv files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `csv_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket. @@ -69,21 +70,23 @@ def upload_new_csv( self._upload_new( csv_path, "text/csv", + chunk_size, alias_name, title, + is_publishable, license, license_url, - chunk_size, ) def upload_new_sdmx( self, sdmx_path: Union[Path, str], + chunk_size: Optional[int] = 5242880, + is_publishable: Optional[bool] = False, alias_name: Optional[str] = None, title: Optional[str] = None, license: Optional[str] = None, license_url: Optional[str] = None, - chunk_size: int = 5242880, ) -> None: """ Upload sdmx files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `sdmx_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket. @@ -93,21 +96,23 @@ def upload_new_sdmx( self._upload_new( sdmx_path, "application/xml", + chunk_size, alias_name, title, + is_publishable, license, license_url, - chunk_size, ) def upload_new_json( self, json_path: Union[Path, str], + chunk_size: Optional[int] = 5242880, + is_publishable: Optional[bool] = False, alias_name: Optional[str] = None, title: Optional[str] = None, license: Optional[str] = None, license_url: Optional[str] = None, - chunk_size: int = 5242880, ) -> None: """ Upload json files to the DP Upload Service `/upload-new` endpoint. The file to be uploaded (located at `json_path`) is chunked (default chunk size 5242880 bytes) and uploaded to an S3 bucket. @@ -115,11 +120,12 @@ def upload_new_json( `alias_name` and `title` are optional arguments. If these are not explicitly provided, `alias_name` will default to the filename with the extension, and `title` will default to the filename without the extension - e.g. if the filename is "data.json", `alias_name` defaults to "data.json" and `title` defaults to "data". """ self._upload_new( - json_path, - "application/json", - alias_name, + json_path, + "application/json", + chunk_size, + alias_name, title, + is_publishable, license, - license_url, - chunk_size, + license_url, ) diff --git a/dpytools/http/upload/utils.py b/dpytools/http/upload/utils.py index 5e798d0..af9d462 100644 --- a/dpytools/http/upload/utils.py +++ b/dpytools/http/upload/utils.py @@ -35,13 +35,13 @@ def _generate_upload_params(file_path: Path, mimetype: str, chunk_size: int) -> def _generate_upload_new_params( file_path: Path, - chunk_size: int, mimetype: str, + chunk_size: Optional[int], alias_name: Optional[str], title: Optional[str], - is_publishable: bool = False, - licence: str = "Open Government Licence v3.0", - licence_url: str = "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/", + is_publishable: Optional[bool], + licence: Optional[str], + licence_url: Optional[str], ) -> dict: """ Generate request parameters that do not change when iterating through the list of file chunks. @@ -52,7 +52,7 @@ def _generate_upload_new_params( total_size = os.path.getsize(file_path) # Get filename from csv filepath - filename = str(file_path).split("/")[-1] + filename = file_path.name # Get timestamp to create `resumableIdentifier` value in `upload_params` timestamp = datetime.now().strftime("%d%m%y%H%M%S") @@ -60,18 +60,21 @@ def _generate_upload_new_params( # Create identifier from timestamp and filename identifier = f"{timestamp}-{filename.replace('.', '-')}" + # If alias name not provided, default to filename (with extension) if alias_name is None: alias_name = filename + # If title not provided, default to filename (without extension) if title is None: - title = filename.split(".")[0] - + title = file_path.stem + if licence is None: licence = "Open Government Licence v3.0" - - if licence_url is None: - licence_url = "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + if licence_url is None: + licence_url = ( + "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) # Generate upload request params upload_params = { @@ -83,15 +86,10 @@ def _generate_upload_new_params( "resumableFilename": filename, "resumableRelativePath": str(file_path), "aliasName": alias_name, - # TODO Currently the POST request in `_upload_file_chunks` is failing due to an potential issue with the Go code (HTTP 500 error: `bad request: unknown error: : duplicate file path`) - # Once the Go issue is resolved, check that the Path is in the correct format - # See https://github.com/ONSdigital/dp-api-clients-go/blob/a26491512a8336ad9c31b694c045d8e3a3ed0578/files/client.go#L160 "Path": f"datasets/{identifier}", "isPublishable": is_publishable, "Title": title, - # `SizeInBytes` may be populated from `resumableTotalSize` - check once `Path` issue has been resolved "SizeInBytes": total_size, - # `Type` may be populated from `resumableType` - check once `Path` issue has been resolved "Type": mimetype, "Licence": licence, "LicenceUrl": licence_url, diff --git a/tests/http/test_upload.py b/tests/http/test_upload.py index ba55338..58cdf10 100644 --- a/tests/http/test_upload.py +++ b/tests/http/test_upload.py @@ -43,11 +43,14 @@ def test_generate_upload_new_params_for_csv(): Ensures that _generate_upload_new_params() populates the upload_params dict with the correct values """ upload_params = _generate_upload_new_params( - file_path="tests/test_cases/countries.csv", + file_path=Path("tests/test_cases/countries.csv"), + mimetype="text/csv", chunk_size=5242880, title="title", alias_name="alias-name", - mimetype="text/csv", + is_publishable=False, + licence="My licence", + licence_url="www.example.org/licence", ) assert upload_params["resumableTotalChunks"] == 2 assert upload_params["resumableTotalSize"] == 6198846 @@ -57,15 +60,20 @@ def test_generate_upload_new_params_for_csv(): assert upload_params["resumableRelativePath"] == "tests/test_cases/countries.csv" assert upload_params["aliasName"] == "alias-name" assert upload_params["Title"] == "title" + assert upload_params["Licence"] == "My licence" + assert upload_params["LicenceUrl"] == "www.example.org/licence" def test_generate_new_upload_params_for_sdmx(): upload_params = _generate_upload_new_params( - file_path="tests/test_cases/test.xml", - chunk_size=5242880, + file_path=Path("tests/test_cases/test.xml"), mimetype="application/xml", + chunk_size=5242880, alias_name="alias-name", title="title", + is_publishable=False, + licence="My licence", + licence_url="www.example.org/licence", ) assert upload_params["resumableTotalChunks"] == 1 assert upload_params["resumableTotalSize"] == 3895 @@ -75,3 +83,5 @@ def test_generate_new_upload_params_for_sdmx(): assert upload_params["resumableRelativePath"] == "tests/test_cases/test.xml" assert upload_params["aliasName"] == "alias-name" assert upload_params["Title"] == "title" + assert upload_params["Licence"] == "My licence" + assert upload_params["LicenceUrl"] == "www.example.org/licence"