Skip to content

Commit

Permalink
Closes #2510: Endpoint to uningest multiple data files
Browse files Browse the repository at this point in the history
  • Loading branch information
diasf committed Sep 3, 2024
1 parent d49edbe commit 48ecf07
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import edu.harvard.iq.dataverse.api.dto.PrivateUrlDTO;
import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
import edu.harvard.iq.dataverse.api.dto.SubmitForReviewDataDTO;
import edu.harvard.iq.dataverse.api.dto.UningestRequestDTO;
import edu.harvard.iq.dataverse.api.dto.UningestableItemDTO;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode;
import edu.harvard.iq.dataverse.common.BundleUtil;
Expand Down Expand Up @@ -70,6 +72,7 @@
import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand;
import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand;
import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UningestFileCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
Expand All @@ -78,6 +81,8 @@
import edu.harvard.iq.dataverse.export.ExportService;
import edu.harvard.iq.dataverse.export.ExporterType;
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.ingest.UningestInfoService;
import edu.harvard.iq.dataverse.ingest.UningestService;
import edu.harvard.iq.dataverse.notification.NotificationObjectType;
import edu.harvard.iq.dataverse.notification.NotificationParameter;
import edu.harvard.iq.dataverse.notification.UserNotificationService;
Expand Down Expand Up @@ -190,6 +195,7 @@ public class Datasets extends AbstractApiBean {
private PermissionServiceBean permissionSvc;
private FileLabelsService fileLabelsService;
private DatasetFileDownloadUrlCsvWriter fileDownloadUrlCsvWriter;
private UningestInfoService uningestInfoService;

// -------------------- CONSTRUCTORS --------------------

Expand All @@ -208,7 +214,8 @@ public Datasets(DatasetDao datasetDao, DataverseDao dataverseDao,
FileDownloadAPIHandler fileDownloadAPIHandler, DataverseRoleServiceBean rolesSvc,
RoleAssigneeServiceBean roleAssigneeSvc, PermissionServiceBean permissionSvc,
FileLabelsService fileLabelsService,
DatasetFileDownloadUrlCsvWriter fileDownloadUrlCsvWriter) {
DatasetFileDownloadUrlCsvWriter fileDownloadUrlCsvWriter,
UningestInfoService uningestInfoService) {
this.datasetDao = datasetDao;
this.dataverseDao = dataverseDao;
this.userNotificationService = userNotificationService;
Expand All @@ -232,6 +239,7 @@ public Datasets(DatasetDao datasetDao, DataverseDao dataverseDao,
this.permissionSvc = permissionSvc;
this.fileLabelsService = fileLabelsService;
this.fileDownloadUrlCsvWriter = fileDownloadUrlCsvWriter;
this.uningestInfoService = uningestInfoService;
}

// -------------------- LOGIC --------------------
Expand Down Expand Up @@ -473,6 +481,43 @@ public Response getVersion(@PathParam("id") String datasetId, @PathParam("versio
}));
}

@GET
@Path("{id}/uningest")
public Response listUningestableFiles(@PathParam("id") String datasetId) {
return allowCors(response(req -> ok(uningestInfoService.listUningestableFiles(findDatasetOrDie(datasetId)).stream()
.map(UningestableItemDTO::fromDatafile)
.collect(Collectors.toList()))));
}

@POST
@ApiWriteOperation
@Path("{id}/uningest")
public Response uningestFiles(@PathParam("id") String datasetId, JsonObject json) {
return allowCors(response(req -> {
UningestRequestDTO rq = jsonParser().parseUningestRequest(json);

List<DataFile> dataFiles = uningestInfoService.listUningestableFiles(findDatasetOrDie(datasetId)).stream()
.filter(df -> rq.getDataFileIds().isEmpty() || rq.getDataFileIds().contains(df.getId()))
.collect(Collectors.toList());

List<String> uningestFailedFileIds = new ArrayList<>();
for(DataFile df : dataFiles) {
try {
execCommand(new UningestFileCommand(req, df));
} catch (Exception e) {
uningestFailedFileIds.add(df.getId().toString());
}
}

if (uningestFailedFileIds.isEmpty()) {
return ok("Uningest performed on " + dataFiles.size() + " files.");
} else {
return ok("Uningest failed on " + uningestFailedFileIds.size() + " of " + dataFiles.size() +
" files. Failed ids: " + String.join(", ", uningestFailedFileIds));
}
}));
}

@GET
@Path("{id}/versions/{versionId}/files")
public Response listVersionFiles(@PathParam("id") String datasetId, @PathParam("versionId") String versionId) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package edu.harvard.iq.dataverse.api.dto;

import java.util.HashSet;
import java.util.Set;

public class UningestRequestDTO {

private Set<Long> dataFileIds = new HashSet<>();

// -------------------- GETTERS --------------------

public Set<Long> getDataFileIds() {
return dataFileIds;
}

// -------------------- LOGIC --------------------

public UningestRequestDTO addDataFileId(Long id) {
this.dataFileIds.add(id);
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package edu.harvard.iq.dataverse.api.dto;

import edu.harvard.iq.dataverse.persistence.datafile.DataFile;
import edu.harvard.iq.dataverse.util.FileUtil;
import org.apache.commons.lang.StringUtils;

import java.io.Serializable;

public class UningestableItemDTO implements Serializable {
private Long dataFileId;
private String fileName;
private String originalFormat;
private String md5;
private String unf;

// -------------------- GETTERS --------------------


public Long getDataFileId() {
return dataFileId;
}

public String getFileName() {
return fileName;
}

public String getOriginalFormat() {
return originalFormat;
}

public String getMd5() {
return md5;
}

public String getUnf() {
return unf;
}

// -------------------- LOGIC --------------------

public static UningestableItemDTO fromDatafile(DataFile file) {
UningestableItemDTO item = new UningestableItemDTO();
item.dataFileId = file.getId();
item.fileName = file.getFileMetadata().getLabel();
item.originalFormat = extractAndFormatExtension(file);
item.md5 = file.getChecksumType() == DataFile.ChecksumType.MD5
? file.getChecksumValue() : StringUtils.EMPTY;
item.unf = file.getUnf();
return item;
}

// -------------------- PRIVATE --------------------

public static String extractAndFormatExtension(DataFile file) {
String extension = FileUtil.generateOriginalExtension(file.isTabularData()
? file.getDataTable().getOriginalFileFormat()
: file.getContentType());
return extension.replaceFirst("\\.", StringUtils.EMPTY).toUpperCase();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.MetadataBlockDao;
import edu.harvard.iq.dataverse.api.dto.UningestRequestDTO;
import edu.harvard.iq.dataverse.common.Util;
import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams;
import edu.harvard.iq.dataverse.persistence.datafile.DataFile;
Expand All @@ -22,8 +23,10 @@
import edu.harvard.iq.dataverse.persistence.workflow.Workflow;
import edu.harvard.iq.dataverse.persistence.workflow.WorkflowStepData;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import io.vavr.control.Option;

import javax.json.JsonArray;
import javax.json.JsonNumber;
import javax.json.JsonObject;
import javax.json.JsonString;
import javax.json.JsonValue;
Expand All @@ -37,6 +40,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.logging.Logger;

Expand Down Expand Up @@ -426,6 +430,17 @@ public DataFile parseDataFile(JsonObject datafileJson) {
return dataFile;
}

public UningestRequestDTO parseUningestRequest(JsonObject json) {
UningestRequestDTO rq = new UningestRequestDTO();

Option.of(json.getJsonArray("dataFileIds"))
.map(ar -> ar.getValuesAs(JsonNumber.class))
.forEach(ids -> ids.stream().map(JsonNumber::longValue)
.forEach(rq::addDataFileId));

return rq;
}

public DatasetField parseFieldForDelete(JsonObject json) throws JsonParseException {
DatasetField ret = new DatasetField();
DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
Expand Down

0 comments on commit 48ecf07

Please sign in to comment.