Skip to content

Commit

Permalink
Feat/MET-5305 Accept .tar and .tar.gz uploads harvesting (#130)
Browse files Browse the repository at this point in the history
* MET-5305 Updating code to accept other file harvesting

* MET-5305 Fixed unit tests

* MET-5305 Changed Step enum and fixed unit tests

* MET-5305 Fixed issue of harvesting other files

* MET-5305 Fixed http harvesting to accept .tar and .tar.gz files

* MET-5303 Code refactoring

* MET-5305 Created unit tests and fixed code review comments

* MET-5305 Changed unit tests
  • Loading branch information
JoanaCMS authored Jun 1, 2023
1 parent ae1cbee commit a2983b0
Show file tree
Hide file tree
Showing 31 changed files with 354 additions and 230 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<properties>
<version.java>11</version.java>
<version.springdoc>1.7.0</version.springdoc>
<version.metis>10</version.metis>
<version.metis>11-SNAPSHOT</version.metis>
<version.europeana>2.15.3</version.europeana>
<version.postgresql>42.6.0</version.postgresql>
<version.aws.s3>1.12.261</version.aws.s3>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/eu/europeana/metis/sandbox/common/Step.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*/
public enum Step {
HARVEST_OAI_PMH("harvest OAI-PMH",1),
HARVEST_ZIP("harvest zip", 2),
HARVEST_FILE("harvest file", 2),
TRANSFORM_TO_EDM_EXTERNAL("transform to EDM external", 3),
VALIDATE_EXTERNAL("validate (edm external)", 4),
TRANSFORM("transform", 5),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package eu.europeana.metis.sandbox.common.exception;

/**
* Exception class for when an invalid type of compressed file is detected
*/
public class InvalidCompressedFileException extends RuntimeException {

private static final long serialVersionUID = -2555540887797325483L;

public InvalidCompressedFileException(Throwable cause) {
super("File provided is not valid compressed file. ", cause);
}

}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import eu.europeana.indexing.tiers.view.RecordTierCalculationView;
import eu.europeana.metis.sandbox.common.OaiHarvestData;
import eu.europeana.metis.sandbox.common.Step;
import eu.europeana.metis.sandbox.common.exception.InvalidCompressedFileException;
import eu.europeana.metis.sandbox.common.exception.NoRecordFoundException;
import eu.europeana.metis.sandbox.common.exception.XsltProcessingException;
import eu.europeana.metis.sandbox.common.locale.Country;
Expand All @@ -18,13 +19,15 @@
import eu.europeana.metis.sandbox.service.record.RecordLogService;
import eu.europeana.metis.sandbox.service.record.RecordTierCalculationService;
import eu.europeana.metis.sandbox.service.workflow.HarvestPublishService;
import eu.europeana.metis.utils.CompressedFileExtension;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Content;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.parameters.RequestBody;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.springframework.http.HttpStatus;
import org.springframework.web.bind.annotation.GetMapping;
Expand All @@ -40,6 +43,10 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -116,13 +123,13 @@ public DatasetController(DatasetService datasetService, DatasetLogService datase
}

/**
* POST API calls for harvesting and processing the records given a zip file
* POST API calls for harvesting and processing the records given a zip, tar or tar.gz file
*
* @param datasetName the given name of the dataset to be processed
* @param country the given country from which the records refer to
* @param language the given language that the records contain
* @param stepsize the stepsize
* @param dataset the given dataset itself to be processed as a zip file
* @param dataset the given dataset itself to be processed as a compressed file
* @param xsltFile the xslt file used for transformation to edm external
* @return 202 if it's processed correctly, 4xx or 500 otherwise
*/
Expand All @@ -137,9 +144,10 @@ public DatasetIdDto harvestDatasetFromFile(
@Parameter(description = "country of the dataset", required = true) @RequestParam Country country,
@Parameter(description = "language of the dataset", required = true) @RequestParam Language language,
@Parameter(description = "step size to apply in record selection", schema = @Schema(description = "step size", defaultValue = "1")) @RequestParam(required = false) Integer stepsize,
@Parameter(description = "dataset records uploaded in a zip file", required = true) @RequestParam MultipartFile dataset,
@Parameter(description = "dataset records uploaded in a zip, tar or tar.gz file", required = true) @RequestParam MultipartFile dataset,
@Parameter(description = "xslt file to transform to EDM external") @RequestParam(required = false) MultipartFile xsltFile) {
checkArgument(NAME_PATTERN.matcher(datasetName).matches(), MESSAGE_FOR_DATASET_VALID_NAME);
CompressedFileExtension compressedFileExtension = getCompressedFileExtensionTypeFromUploadedFile(dataset);
if (stepsize != null) {
checkArgument(stepsize > 0, MESSAGE_FOR_STEP_SIZE_VALID_VALUE);
}
Expand All @@ -150,13 +158,13 @@ public DatasetIdDto harvestDatasetFromFile(
DatasetMetadata datasetMetadata = DatasetMetadata.builder().withDatasetId(createdDatasetId)
.withDatasetName(datasetName).withCountry(country).withLanguage(language)
.withStepSize(stepsize).build();
harvestPublishService.runHarvestZipAsync(dataset, datasetMetadata)
harvestPublishService.runHarvestFileAsync(dataset, datasetMetadata, compressedFileExtension)
.exceptionally(e -> datasetLogService.logException(createdDatasetId, e));
return new DatasetIdDto(createdDatasetId);
}

/**
* POST API calls for harvesting and processing the records given a URL of a zip file
* POST API calls for harvesting and processing the records given a URL of a compressed file
*
* @param datasetName the given name of the dataset to be processed
* @param country the given country from which the records refer to
Expand All @@ -182,6 +190,7 @@ public DatasetIdDto harvestDatasetFromURL(
@Parameter(description = "xslt file to transform to EDM external") @RequestParam(required = false) MultipartFile xsltFile) {

checkArgument(NAME_PATTERN.matcher(datasetName).matches(), MESSAGE_FOR_DATASET_VALID_NAME);
CompressedFileExtension compressedFileExtension = getCompressedFileExtensionTypeFromUrl(url);
if (stepsize != null) {
checkArgument(stepsize > 0, MESSAGE_FOR_STEP_SIZE_VALID_VALUE);
}
Expand All @@ -194,7 +203,7 @@ public DatasetIdDto harvestDatasetFromURL(
DatasetMetadata datasetMetadata = DatasetMetadata.builder().withDatasetId(createdDatasetId)
.withDatasetName(datasetName).withCountry(country).withLanguage(language)
.withStepSize(stepsize).build();
harvestPublishService.runHarvestHttpZipAsync(url, datasetMetadata)
harvestPublishService.runHarvestHttpFileAsync(url, datasetMetadata, compressedFileExtension)
.exceptionally(e -> datasetLogService.logException(createdDatasetId, e));
return new DatasetIdDto(createdDatasetId);
}
Expand Down Expand Up @@ -307,7 +316,7 @@ public String getRecord(@PathVariable("id") String datasetId, @RequestParam Stri
private Set<Step> getSetFromStep(String step) {
Set<Step> steps;
if (step == null || step.isBlank() || step.equals("HARVEST")) {
steps = Set.of(Step.HARVEST_ZIP, Step.HARVEST_OAI_PMH);
steps = Set.of(Step.HARVEST_FILE, Step.HARVEST_OAI_PMH);
} else {
try {
steps = Set.of(Step.valueOf(step));
Expand Down Expand Up @@ -372,39 +381,87 @@ private InputStream createXsltAsInputStreamIfPresent(MultipartFile xslt) {
return new ByteArrayInputStream(new byte[0]);
}

private static class CountryView {

@JsonProperty("name")
private final String name;
@JsonProperty("xmlValue")
private final String xmlValue;

/**
* Instantiates a new Country view.
*
* @param country the country
*/
CountryView(Country country) {
this.name = country.name();
this.xmlValue = country.xmlValue();
private CompressedFileExtension getCompressedFileExtensionTypeFromUrl(String url) {

try {
if (url.startsWith("file:/")) {
Path path = Path.of(url);
String fileContentType = Files.probeContentType(path);

return getCompressedFileExtensionType(fileContentType);
} else {

URLConnection urlConnection = new URL(url).openConnection();
String fileContentType = urlConnection.getContentType();

if (StringUtils.isEmpty(fileContentType)) {
throw new InvalidCompressedFileException(new Exception("There was an issue inspecting file's content type"));
}

return getCompressedFileExtensionType(fileContentType);


}
} catch (IOException e) {
throw new InvalidCompressedFileException(e);

}
}

private static class LanguageView {

@JsonProperty("name")
private final String name;
@JsonProperty("xmlValue")
private final String xmlValue;

/**
* Instantiates a new Language view.
*
* @param language the language
*/
LanguageView(Language language) {
this.name = language.name();
this.xmlValue = language.xmlValue();
private CompressedFileExtension getCompressedFileExtensionTypeFromUploadedFile(MultipartFile uploadedFile){
String fileContentType = uploadedFile.getContentType();
if (StringUtils.isEmpty(fileContentType)) {
throw new InvalidCompressedFileException(new Exception("There was an issue inspecting file's content type"));
}

return getCompressedFileExtensionType(fileContentType);
}

private CompressedFileExtension getCompressedFileExtensionType(String fileContentType){
if (fileContentType.contains("gzip")) {
return CompressedFileExtension.TAR_GZ;
} else if (fileContentType.contains("zip")) {
return CompressedFileExtension.ZIP;
} else if (fileContentType.contains("x-tar")) {
return CompressedFileExtension.TAR;
} else {
throw new InvalidCompressedFileException(new Exception("The compressed file type is invalid"));
}
}

private static class CountryView {

@JsonProperty("name")
private final String name;
@JsonProperty("xmlValue")
private final String xmlValue;

/**
* Instantiates a new Country view.
*
* @param country the country
*/
CountryView(Country country) {
this.name = country.name();
this.xmlValue = country.xmlValue();
}
}

private static class LanguageView {

@JsonProperty("name")
private final String name;
@JsonProperty("xmlValue")
private final String xmlValue;

/**
* Instantiates a new Language view.
*
* @param language the language
*/
LanguageView(Language language) {
this.name = language.name();
this.xmlValue = language.xmlValue();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import static java.lang.String.format;

import eu.europeana.metis.sandbox.common.exception.InvalidDatasetException;
import eu.europeana.metis.sandbox.common.exception.InvalidZipFileException;
import eu.europeana.metis.sandbox.common.exception.InvalidCompressedFileException;
import eu.europeana.metis.sandbox.common.exception.NoRecordFoundException;
import eu.europeana.metis.sandbox.common.exception.RecordParsingException;
import eu.europeana.metis.sandbox.common.exception.ServiceException;
Expand Down Expand Up @@ -48,8 +48,8 @@ public ResponseEntity<Object> handleIllegalArgumentException(IllegalArgumentExce
return new ResponseEntity<>(exceptionModel, exceptionModel.getStatus());
}

@ExceptionHandler(InvalidZipFileException.class)
public ResponseEntity<Object> handleIInvalidZipFileException(InvalidZipFileException ex) {
@ExceptionHandler(InvalidCompressedFileException.class)
public ResponseEntity<Object> handleInvalidCompressedFileException(InvalidCompressedFileException ex) {
var exceptionModel = new ExceptionModelDto(HttpStatus.BAD_REQUEST.value(),
HttpStatus.BAD_REQUEST, ex.getMessage());
LOGGER.error(ex.getMessage(), ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,43 @@
import eu.europeana.metis.sandbox.common.OaiHarvestData;
import eu.europeana.metis.sandbox.common.exception.ServiceException;
import eu.europeana.metis.sandbox.domain.DatasetMetadata;
import eu.europeana.metis.utils.CompressedFileExtension;
import org.springframework.web.multipart.MultipartFile;

import java.util.concurrent.CompletableFuture;

public interface HarvestPublishService {

/**
* Start the harvest of a zip asynchronously on the given file {@link MultipartFile}
* Start the harvest of a compressed file asynchronously on the given file {@link MultipartFile}
*
* @param file zip file containing one or more records
* @param datasetMetadata object that encapsulates all data related to the dataset
* @param file compressed file containing one or more records
* @param datasetMetadata object that encapsulates all data related to the dataset
* @param compressedFileExtension the content type of file being uploaded
* @return A HarvestContent object containing the content of the harvest and a bollean indicating
* if it reached the max number of records
* @throws ServiceException if file is not valid, error reading file, if records are empty
*/
CompletableFuture<Void> runHarvestZipAsync(MultipartFile file, DatasetMetadata datasetMetadata);
CompletableFuture<Void> runHarvestFileAsync(MultipartFile file, DatasetMetadata datasetMetadata, CompressedFileExtension compressedFileExtension);

/**
* Start the harvest of an url asynchronously on the given URL {@link String}
*
* @param url URL for zip file containing one or more records
* @param datasetMetadata the object that encapsulates all data related to the dataset
* @param url URL for compressed file containing one or more records
* @param datasetMetadata the object that encapsulates all data related to the dataset
* @param compressedFileExtension the content type of the file being uploaded
* @return A HarvestContent object containing the content of the harvest and a boolean indicating
* if it reached the max number of records
* @throws ServiceException if error processing URL, if URL timeout, if records are empty
*/
CompletableFuture<Void> runHarvestHttpZipAsync(String url, DatasetMetadata datasetMetadata);
CompletableFuture<Void> runHarvestHttpFileAsync(String url, DatasetMetadata datasetMetadata, CompressedFileExtension compressedFileExtension);

/**
* Async publish to message broker for further processing. This will send messages to 'harvestOai`
* queue
*
* @param datasetMetadata the objevct that encapsulates all dataset related to the dataset
* @param oaiHarvestData And object that encapsulates the data necessary for OAI-PMH harvesting
* @param oaiHarvestData And object that encapsulates the data necessary for OAI-PMH harvesting
* @return {@link CompletableFuture} of the process
*/
CompletableFuture<Void> runHarvestOaiPmhAsync(DatasetMetadata datasetMetadata, OaiHarvestData oaiHarvestData);
Expand Down
Loading

0 comments on commit a2983b0

Please sign in to comment.