From 493ad2e690d8702e7a65750fcdf2b0a6d6dfe53b Mon Sep 17 00:00:00 2001 From: Julia Damerow Date: Tue, 11 Oct 2022 14:19:06 -0400 Subject: [PATCH 1/2] Fix pom.xml --- giles-eco-imogen/pom.xml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/giles-eco-imogen/pom.xml b/giles-eco-imogen/pom.xml index fb29984..287097c 100644 --- a/giles-eco-imogen/pom.xml +++ b/giles-eco-imogen/pom.xml @@ -343,17 +343,18 @@ 1.8 + + org.apache.tomcat.maven + tomcat7-maven-plugin + 2.2 + + ${tomcat.deploy.path} + ${tomcat.server.id} + /${tomcat.app.name} + + - - org.apache.tomcat.maven - tomcat7-maven-plugin - 2.2 - - ${tomcat.deploy.path} - ${tomcat.server.id} - /${tomcat.app.name} - - + From d91a6a272d001a10aded754840027f11e4525c21 Mon Sep 17 00:00:00 2001 From: Julia Damerow Date: Fri, 14 Oct 2022 16:07:28 -0400 Subject: [PATCH 2/2] [GECO-129] fixes for certain pdf files --- giles-eco-imogen/pom.xml | 4 ++-- .../imogen/core/service/impl/ImageExtractionManager.java | 6 +++--- giles-eco-imogen/src/main/resources/log4j.xml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/giles-eco-imogen/pom.xml b/giles-eco-imogen/pom.xml index 287097c..aef4959 100644 --- a/giles-eco-imogen/pom.xml +++ b/giles-eco-imogen/pom.xml @@ -200,12 +200,12 @@ org.apache.pdfbox pdfbox - 2.0.23 + 3.0.0-RC1 org.apache.pdfbox pdfbox-tools - 2.0.23 + 3.0.0-RC1 com.github.jai-imageio diff --git a/giles-eco-imogen/src/main/java/edu/asu/diging/gilesecosystem/imogen/core/service/impl/ImageExtractionManager.java b/giles-eco-imogen/src/main/java/edu/asu/diging/gilesecosystem/imogen/core/service/impl/ImageExtractionManager.java index 0e8d52b..ba922c5 100644 --- a/giles-eco-imogen/src/main/java/edu/asu/diging/gilesecosystem/imogen/core/service/impl/ImageExtractionManager.java +++ b/giles-eco-imogen/src/main/java/edu/asu/diging/gilesecosystem/imogen/core/service/impl/ImageExtractionManager.java @@ -9,6 +9,7 @@ import javax.annotation.PostConstruct; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; @@ -67,7 +68,7 @@ public void init() { * slow performance in color operations. Solution: disable LittleCMS in favour * of the old KCMS (Kodak Color Management System)" */ - System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); + //System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); requestFactory.config(CompletionNotificationRequest.class); } @@ -86,8 +87,7 @@ public void extractImages(ICompletedStorageRequest request) { PDDocument pdfDocument = null; RequestStatus status = RequestStatus.COMPLETE; try { - pdfDocument = PDDocument.load(new ByteArrayInputStream(downloadFile(request.getDownloadUrl())), - MemoryUsageSetting.setupTempFileOnly()); + pdfDocument = Loader.loadPDF(new ByteArrayInputStream(downloadFile(request.getDownloadUrl()))); } catch (IOException e) { messageHandler.handleMessage("Could not extract text.", e, MessageType.ERROR); status = RequestStatus.FAILED; diff --git a/giles-eco-imogen/src/main/resources/log4j.xml b/giles-eco-imogen/src/main/resources/log4j.xml index b36590d..78fcf45 100644 --- a/giles-eco-imogen/src/main/resources/log4j.xml +++ b/giles-eco-imogen/src/main/resources/log4j.xml @@ -11,7 +11,7 @@ - +