Skip to content

Commit

Permalink
Closes #2440: Improved shapefile handler, error handling and simpler …
Browse files Browse the repository at this point in the history
…api, use commons-compress for extraction supporting unicode extra fields in zip
  • Loading branch information
diasf committed Mar 27, 2024
1 parent cd80fb5 commit 6c90c60
Show file tree
Hide file tree
Showing 10 changed files with 341 additions and 637 deletions.
3 changes: 0 additions & 3 deletions dataverse-webapp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>com.maxmind.geoip2</groupId>
Expand All @@ -294,7 +293,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>net.sf.ehcache</groupId>
Expand All @@ -319,7 +317,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>org.duracloud</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.ShapefileHandler;
import io.vavr.Tuple2;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -246,7 +247,7 @@ private DataFile unpackFitsGzippedAndCreateDataFile(Path tempFile, String fileNa
private List<DataFile> unpackZipAndCreateDataFiles(Path tempFile, Long fileSizeLimit, Long zipFileUnpackFilesLimit) throws IOException {
List<DataFile> datafiles = new ArrayList<>();

try (ZipInputStream unZippedIn = new ZipInputStream(Files.newInputStream(tempFile))) {
try (ZipArchiveInputStream unZippedIn = new ZipArchiveInputStream(Files.newInputStream(tempFile))) {

ZipEntry zipEntry = unZippedIn.getNextEntry();
while (zipEntry != null) {
Expand Down Expand Up @@ -314,23 +315,22 @@ private Tuple2<String, String> extractDirectoryAndFileName(ZipEntry zipEntry) {
* one zip archive per each complete set of shape files.
*/
private List<DataFile> createDataFilesFromReshapedShapeFile(Path tempFile, Long fileSizeLimit) throws IOException {
try (IngestServiceShapefileHelper shpHelper = new IngestServiceShapefileHelper(tempFile.toFile(), Paths.get(getFilesTempDirectory()).toFile())) {
List<DataFile> datafiles = new ArrayList<>();

IngestServiceShapefileHelper shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), Paths.get(getFilesTempDirectory()).toFile());
for (File finalFile : shpHelper.processFile()) {
String finalType = fileTypeDetector.determineFileType(finalFile, finalFile.getName());

List<DataFile> datafiles = new ArrayList<>();

for (File finalFile : shpIngestHelper.processFile()) {
String finalType = fileTypeDetector.determineFileType(finalFile, finalFile.getName());

try (FileInputStream finalFileInputStream = new FileInputStream(finalFile)) {
Path unZippedShapeTempFile = FileUtil.limitedInputStreamToTempFile(finalFileInputStream, fileSizeLimit);
DataFile newDatafile = createSingleDataFile(unZippedShapeTempFile, finalFile.getName(), finalType, 0L);
datafiles.add(newDatafile);
try (FileInputStream finalFileInputStream = new FileInputStream(finalFile)) {
Path unZippedShapeTempFile = FileUtil.limitedInputStreamToTempFile(finalFileInputStream, fileSizeLimit);
DataFile newDatafile = createSingleDataFile(unZippedShapeTempFile, finalFile.getName(), finalType, 0L);
datafiles.add(newDatafile);

}
}
}

return datafiles;
return datafiles;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,13 @@ public String determineFileType(File f, String fileName) throws IOException {

// Is this a zipped Shapefile?
// Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile

ShapefileHandler shapefileHandler = new ShapefileHandler(f);
if (shapefileHandler.containsShapefile()) {
fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE;
try {
ShapefileHandler shapefileHandler = new ShapefileHandler(f);
if (shapefileHandler.containsShapefile()) {
fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE;
}
} catch (Exception ex) {
logger.warn("Shapefile inspection failed for file {}", fileName, ex);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@

import com.google.common.base.Preconditions;
import edu.harvard.iq.dataverse.util.ShapefileHandler;
import io.vavr.control.Try;
import org.apache.commons.io.FileUtils;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;


Expand All @@ -21,87 +25,64 @@
*
* @author raprasad
*/
public class IngestServiceShapefileHelper {
public class IngestServiceShapefileHelper implements Closeable {

private static final Logger logger = Logger.getLogger(IngestServiceShapefileHelper.class.getCanonicalName());

private File zippedShapefile;
private File rezipFolderBase;
private final File zippedShapefile;
private final File reZipFolder;
private final File unZipFolder;

private boolean isValidFile(File fileObject) {

if (fileObject == null) {
logger.warning("fileObject was null");
return false;
}
if (!fileObject.isFile()) {
logger.warning("fileObject was not a file. Failed \"isFile()\": " + fileObject.getAbsolutePath());
return false;
}
return true;
}
// -------------------- CONSTRUCTOR --------------------


private boolean isValidFolder(File fileObject) {

if (fileObject == null) {
logger.warning("fileObject was null");
return false;
}
if (!fileObject.isDirectory()) {
logger.warning("fileObject was not a directory. Failed \"isFile()\": " + fileObject.getAbsolutePath());
return false;
}
return true;
}

/*
Constructor that accepts a file object
*/
public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolderBase) {
/**
* Constructor that accepts a file object
*/
public IngestServiceShapefileHelper(File zippedShapefile, File workingFolderBase) {
Preconditions.checkArgument(isValidFile(zippedShapefile));
Preconditions.checkArgument(isValidFolder(rezipFolderBase));
Preconditions.checkArgument(isValidFolder(workingFolderBase));

this.zippedShapefile = zippedShapefile;
this.rezipFolderBase = rezipFolderBase;
String id = new SimpleDateFormat("yyyy-MM-dd-hh-mm-ss-SSS").format(new Date());
this.reZipFolder = getShapefileUnzipTempDirectory(workingFolderBase, "shp_" + id + "_rezip");
this.unZipFolder = getShapefileUnzipTempDirectory(workingFolderBase, "shp_" + id + "_unzip");

}

public List<File> processFile() {
// -------------------- LOGIC --------------------

// (1) Use the ShapefileHandler to the .zip for a shapefile
//
ShapefileHandler shpHandler = new ShapefileHandler(zippedShapefile);
if (!shpHandler.containsShapefile()) {
logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
throw new IllegalStateException("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
}

// (2) Rezip the shapefile pieces
File rezipFolder = getShapefileUnzipTempDirectory(rezipFolderBase);
logger.info("rezipFolder: " + rezipFolderBase.getAbsolutePath());
boolean rezipSuccess;
public List<File> processFile() {
try {
rezipSuccess = shpHandler.rezipShapefileSets(rezipFolder);
} catch (IOException ex) {
logger.severe("Shapefile was not correctly unpacked/repacked");
logger.severe("shpHandler message: " + shpHandler.errorMessage);
throw new IllegalStateException("Shapefile was not correctly unpacked/repacked: " + shpHandler.errorMessage, ex);
}
// (1) Use the ShapefileHandler to the .zip for a shapefile
//
ShapefileHandler shpHandler = new ShapefileHandler(zippedShapefile);
if (!shpHandler.containsShapefile()) {
logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
throw new IllegalStateException("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
}

if (!rezipSuccess) {
throw new IllegalStateException("Shapefile was not correctly unpacked/repacked: " + shpHandler.errorMessage);
// (2) Rezip the shapefile pieces
return shpHandler.reZipShapefileSets(unZipFolder, reZipFolder);
} catch (Exception ex) {
throw new IllegalStateException("Shapefile was not correctly unpacked/repacked", ex);
}

return shpHandler.getFinalRezippedFiles();
}

@Override
public void close() throws IOException {
deleteDirectory(unZipFolder);
deleteDirectory(reZipFolder);
}

private static File getShapefileUnzipTempDirectory(File tempDirectoryBase) {
// -------------------- PRIVATE --------------------

String datestampedFileName = "shp_" + new SimpleDateFormat("yyyy-MM-dd-hh-mm-ss-SSS").format(new Date());
private void deleteDirectory(File directory) {
Try.run(() -> FileUtils.deleteDirectory(directory))
.onFailure(ex -> logger.log(Level.SEVERE, "Error cleaning shapefile working directory:" + directory, ex));
}

File datestampedFolder = new File(tempDirectoryBase, datestampedFileName);
private static File getShapefileUnzipTempDirectory(File tempDirectoryBase, String directoryName) {
File datestampedFolder = new File(tempDirectoryBase, directoryName);
if (!datestampedFolder.isDirectory()) {
/* Note that "createDirectories()" must be used - not
* "createDirectory()", to make sure all the parent
Expand All @@ -115,4 +96,31 @@ private static File getShapefileUnzipTempDirectory(File tempDirectoryBase) {
}
return datestampedFolder;
}

private boolean isValidFile(File fileObject) {

if (fileObject == null) {
logger.warning("fileObject was null");
return false;
}
if (!fileObject.isFile()) {
logger.warning("fileObject was not a file. Failed \"isFile()\": " + fileObject.getAbsolutePath());
return false;
}
return true;
}


private boolean isValidFolder(File fileObject) {

if (fileObject == null) {
logger.warning("fileObject was null");
return false;
}
if (!fileObject.isDirectory()) {
logger.warning("fileObject was not a directory. Failed \"isFile()\": " + fileObject.getAbsolutePath());
return false;
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.primefaces.model.file.UploadedFile;

import javax.annotation.PreDestroy;
import javax.ejb.EJBException;
import javax.faces.application.FacesMessage;
import javax.faces.context.FacesContext;
import javax.faces.event.ActionEvent;
Expand Down Expand Up @@ -882,7 +883,7 @@ public void handleFileUpload(FileUploadEvent event) throws IOException {
// zip file.
dFileList = dataFileCreator.createDataFiles(inputStream, uploadedFile.getFileName(), uploadedFile.getContentType());
dataFileUploadInfo.addSizeAndDataFiles(fileSize, dFileList);
} catch (IOException | FileExceedsMaxSizeException ex) {
} catch (EJBException | IOException | FileExceedsMaxSizeException ex) {
logger.warning("Failed to process and/or save the file " + uploadedFile.getFileName() + "; " + ex.getMessage());
return;
} catch (VirusFoundException e) {
Expand Down
Loading

0 comments on commit 6c90c60

Please sign in to comment.