Skip to content

Commit

Permalink
Merge pull request #2 from diging/develop
Browse files Browse the repository at this point in the history
Prepare release v0.1
  • Loading branch information
jdamerow authored Dec 7, 2016
2 parents 6b6aa3d + 20cc6d6 commit d4c455b
Show file tree
Hide file tree
Showing 20 changed files with 712 additions and 266 deletions.
5 changes: 5 additions & 0 deletions cepheus/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
<cepheus.base.url>http://localhost:8082/cepheus</cepheus.base.url>
<!-- Kafka -->
<cepheus.kafka.hosts>localhost:9092</cepheus.kafka.hosts>

<!-- pdf -->
<pdf.conversion.dpi>600</pdf.conversion.dpi>
<pdf.conversion.type>RGB</pdf.conversion.type>
<pdf.conversion.format>tiff</pdf.conversion.format>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;

import edu.asu.diging.gilesecosystem.cepheus.kafka.TextExtractionRequestReceiver;
import edu.asu.diging.gilesecosystem.cepheus.kafka.ExtractionRequestReceiver;
import edu.asu.diging.gilesecosystem.cepheus.service.IPropertiesManager;
import edu.asu.diging.gilesecosystem.requests.kafka.KafkaConfig;

Expand All @@ -36,6 +36,7 @@ public Map<String, Object> consumerConfigs() {
IntegerDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
StringDeserializer.class);
props.put(ConsumerConfig.CLIENT_ID_CONFIG, "geco.consumer.cepheus.1");
// consumer groups allow a pool of processes to divide the work of
// consuming and processing records
props.put(ConsumerConfig.GROUP_ID_CONFIG, "gileseco.pdf.processing");
Expand All @@ -57,8 +58,8 @@ public ConcurrentKafkaListenerContainerFactory kafkaListenerContainerFactory() {
}

@Bean
public TextExtractionRequestReceiver receiver() {
return new TextExtractionRequestReceiver();
public ExtractionRequestReceiver receiver() {
return new ExtractionRequestReceiver();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
package edu.asu.diging.gilesecosystem.cepheus.exceptions;

public class CepheusTextExtractionException extends Exception {
public class CepheusExtractionException extends Exception {

/**
*
*/
private static final long serialVersionUID = 4972964567772861597L;

public CepheusTextExtractionException() {
public CepheusExtractionException() {
super();
// TODO Auto-generated constructor stub
}

public CepheusTextExtractionException(String message, Throwable cause,
public CepheusExtractionException(String message, Throwable cause,
boolean enableSuppression, boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
// TODO Auto-generated constructor stub
}

public CepheusTextExtractionException(String message, Throwable cause) {
public CepheusExtractionException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
}

public CepheusTextExtractionException(String message) {
public CepheusExtractionException(String message) {
super(message);
// TODO Auto-generated constructor stub
}

public CepheusTextExtractionException(Throwable cause) {
public CepheusExtractionException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package edu.asu.diging.gilesecosystem.cepheus.kafka;

import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.PropertySource;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.support.KafkaHeaders;
import org.springframework.messaging.handler.annotation.Header;

import com.fasterxml.jackson.databind.ObjectMapper;

import edu.asu.diging.gilesecosystem.cepheus.exceptions.CepheusExtractionException;
import edu.asu.diging.gilesecosystem.cepheus.service.IPropertiesManager;
import edu.asu.diging.gilesecosystem.cepheus.service.pdf.IImageExtractionManager;
import edu.asu.diging.gilesecosystem.cepheus.service.pdf.ITextExtractionManager;
import edu.asu.diging.gilesecosystem.requests.IImageExtractionRequest;
import edu.asu.diging.gilesecosystem.requests.ITextExtractionRequest;
import edu.asu.diging.gilesecosystem.requests.impl.ImageExtractionRequest;
import edu.asu.diging.gilesecosystem.requests.impl.TextExtractionRequest;

@PropertySource("classpath:/config.properties")
public class ExtractionRequestReceiver {

private final Logger logger = LoggerFactory.getLogger(getClass());

@Autowired
private ITextExtractionManager textExtractionManager;

@Autowired
private IImageExtractionManager imageExtractionManager;

@Autowired
protected IPropertiesManager propertiesManager;


@KafkaListener(id="cepheus.extraction", topics = {"${topic_extract_text_request}", "${topic_extract_images_request}"})
public void receiveMessage(String message, @Header(KafkaHeaders.RECEIVED_TOPIC) String topic) {
if (topic.equals(propertiesManager.getProperty(IPropertiesManager.KAFKA_EXTRACTION_TOPIC))) {
extractText(message);
} else if (topic.equals(propertiesManager.getProperty(IPropertiesManager.KAFKA_IMAGE_EXTRACTION_TOPIC))) {
extractImage(message);
}
}

private void extractText(String message) {
ObjectMapper mapper = new ObjectMapper();
ITextExtractionRequest request = null;
try {
request = mapper.readValue(message, TextExtractionRequest.class);
} catch (IOException e) {
logger.error("Could not unmarshall request.", e);
// FIXME: handle this case
return;
}

try {
textExtractionManager.extractText(request);
} catch (CepheusExtractionException e) {
logger.error("Could not extract text.");
// FIXME: send to monitoring app
}
}

private void extractImage(String message) {
ObjectMapper mapper = new ObjectMapper();
IImageExtractionRequest request = null;
try {
request = mapper.readValue(message, ImageExtractionRequest.class);
} catch (IOException e) {
logger.error("Could not unmarshall request.", e);
// FIXME: handle this case
return;
}

try {
imageExtractionManager.extractImages(request);
} catch (CepheusExtractionException e) {
logger.error("Could not extract text.");
// FIXME: send to monitoring app
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ public ResponseEntity<String> getFile(
HttpServletRequest request) {

byte[] content = storageManager.getFileContent(requestId, documentId, null, filename);

if (content == null) {
return new ResponseEntity<>(HttpStatus.NOT_FOUND);
}
storageManager.deleteFile(requestId, documentId, null, filename, true);

String contentType = null;
if (content != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ public interface IPropertiesManager {
public final static String CEPHEUS_URL = "cepheus_url";
public final static String PDF_TO_IMAGE_DPI = "pdf_to_image_dpi";
public final static String PDF_TO_IMAGE_TYPE = "pdf_to_image_type";
public final static String PDF_EXTRACT_TEXT = "pdf_extract_text";
public final static String PDF_TO_IMAGE_FORMAT = "pdf_to_image_format";

public final static String KAFKA_HOSTS = "kafka_hosts";
public final static String KAFKA_EXTRACTION_TOPIC = "topic_extract_text_request";
public final static String KAFKA_EXTRACTION_COMPLETE_TOPIC = "topic_extract_text_request_complete";
public final static String KAFKA_IMAGE_EXTRACTION_TOPIC = "topic_extract_images_request";
public final static String KAFKA_IMAGE_EXTRACTION_COMPLETE_TOPIC = "topic_extract_images_request_complete";
public final static String GILES_ACCESS_TOKEN = "giles_access_token";


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package edu.asu.diging.gilesecosystem.cepheus.service.pdf;

import edu.asu.diging.gilesecosystem.cepheus.exceptions.CepheusExtractionException;
import edu.asu.diging.gilesecosystem.requests.IImageExtractionRequest;

public interface IImageExtractionManager {

public abstract void extractImages(IImageExtractionRequest request)
throws CepheusExtractionException;

}
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package edu.asu.diging.gilesecosystem.cepheus.service.pdf;

import edu.asu.diging.gilesecosystem.cepheus.exceptions.CepheusTextExtractionException;
import edu.asu.diging.gilesecosystem.cepheus.exceptions.CepheusExtractionException;
import edu.asu.diging.gilesecosystem.requests.ITextExtractionRequest;

public interface ITextExtractionManager {

public abstract void extractText(ITextExtractionRequest request) throws CepheusTextExtractionException;
public abstract void extractText(ITextExtractionRequest request) throws CepheusExtractionException;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package edu.asu.diging.gilesecosystem.cepheus.service.pdf.impl;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.http.converter.ByteArrayHttpMessageConverter;
import org.springframework.web.client.RestTemplate;

import edu.asu.diging.gilesecosystem.cepheus.service.IPropertiesManager;
import edu.asu.diging.gilesecosystem.util.files.IFileStorageManager;

public class AExtractionManager {

protected final Logger logger = LoggerFactory.getLogger(getClass());

@Autowired
protected IFileStorageManager fileStorageManager;


@Autowired
protected IPropertiesManager propertiesManager;

public byte[] downloadFile(String url) {
RestTemplate restTemplate = new RestTemplate();
restTemplate.getMessageConverters().add(new ByteArrayHttpMessageConverter());

HttpHeaders headers = new HttpHeaders();
headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM));
headers.set(
"Authorization",
"token "
+ propertiesManager
.getProperty(IPropertiesManager.GILES_ACCESS_TOKEN));
HttpEntity<String> entity = new HttpEntity<String>(headers);

ResponseEntity<byte[]> response = restTemplate.exchange(url, HttpMethod.GET,
entity, byte[].class);
if (response.getStatusCode().equals(HttpStatus.OK)) {
return response.getBody();
}
return null;
}

protected Page saveTextToFile(int pageNr, String requestId,
String documentId, String pageText, String filename, String fileExtentions) {
String docFolder = fileStorageManager.getAndCreateStoragePath(requestId,
documentId, null);

if (pageNr > -1) {
filename = filename + "." + pageNr;
}

if (!fileExtentions.startsWith(".")) {
fileExtentions = "." + fileExtentions;
}
filename = filename + fileExtentions;

String filePath = docFolder + File.separator + filename;
File fileObject = new File(filePath);
try {
fileObject.createNewFile();
} catch (IOException e) {
logger.error("Could not create file.", e);
return null;
}

try {
FileWriter writer = new FileWriter(fileObject);
BufferedWriter bfWriter = new BufferedWriter(writer);
bfWriter.write(pageText);
bfWriter.close();
writer.close();
} catch (IOException e) {
logger.error("Could not write text to file.", e);
return null;
}

String relativePath = fileStorageManager.getFileFolderPathInBaseFolder(requestId, documentId, null);
Page page = new Page(relativePath + File.separator + filename, filename);
page.size = fileObject.length();
return page;
}

protected String getRestEndpoint() {
String restEndpoint = propertiesManager.getProperty(IPropertiesManager.CEPHEUS_URL);
if (restEndpoint.endsWith("/")) {
restEndpoint = restEndpoint.substring(0, restEndpoint.length()-1);
}
return restEndpoint;
}

class Page {
public String path;
public String filename;
public String contentType;
public long size;

public Page(String path, String filename) {
super();
this.path = path;
this.filename = filename;
}
}
}
Loading

0 comments on commit d4c455b

Please sign in to comment.