diff --git a/turing-commons/src/main/java/com/viglet/turing/commons/cache/TurCustomClassCache.java b/turing-commons/src/main/java/com/viglet/turing/commons/cache/TurCustomClassCache.java index 9cd25eb0e..77b365418 100644 --- a/turing-commons/src/main/java/com/viglet/turing/commons/cache/TurCustomClassCache.java +++ b/turing-commons/src/main/java/com/viglet/turing/commons/cache/TurCustomClassCache.java @@ -15,10 +15,16 @@ private TurCustomClassCache() { } private static final Map customClassMap = new HashMap<>(); + /** + * Retrieve an instance of a class from a static HashMap. The class will be instantiated on the HashMap if not present. + * @param className The name of the class that has to be retrieved. + * @return An Optional of the instance. + */ public static Optional getCustomClassMap(String className) { if (!customClassMap.containsKey(className)) { log.info("Custom class {} not found in memory, instancing...", className); try { + // Instância a classe dinamicamente, sem precisar definir o import no momento de compilação customClassMap.put(className, Objects.requireNonNull(Class.forName(className) .getDeclaredConstructor().newInstance())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | diff --git a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/TurSprinklrProcess.java b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/TurSprinklrProcess.java index 2fc561bda..1fc8cdfb3 100644 --- a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/TurSprinklrProcess.java +++ b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/TurSprinklrProcess.java @@ -17,6 +17,8 @@ import com.viglet.turing.connector.sprinklr.persistence.model.TurSprinklrAttributeMapping; import com.viglet.turing.connector.sprinklr.persistence.model.TurSprinklrSource; import com.viglet.turing.connector.sprinklr.persistence.repository.TurSprinklrAttributeMappingRepository; +import com.viglet.turing.connector.sprinklr.utils.FileAsset; +import com.viglet.turing.connector.sprinklr.utils.FileAssetsExtractor; import com.viglet.turing.sprinklr.client.service.kb.TurSprinklrKBService; import com.viglet.turing.sprinklr.client.service.kb.response.TurSprinklrKBSearch; import com.viglet.turing.sprinklr.client.service.kb.response.TurSprinklrSearchResult; @@ -25,7 +27,7 @@ import com.viglet.turing.sprinklr.client.service.token.TurSprinklrTokenService; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; -import org.springframework.beans.factory.annotation.Value; +import org.springframework.beans.factory.annotation.Value; // Gets the value from application.properties import org.springframework.stereotype.Component; import java.net.MalformedURLException; @@ -38,9 +40,12 @@ public class TurSprinklrProcess { private final String turingUrl; private final String turingApiKey; - private TurSNJobItems turSNJobItems = new TurSNJobItems(); private final int jobSize; private final TurSprinklrAttributeMappingRepository turSprinklrAttributeMappingRepository; + /** + * Represents the jobs that will be sent to Turing API + */ + private TurSNJobItems turSNJobItems = new TurSNJobItems(); @Inject public TurSprinklrProcess(@Value("${turing.url}") String turingUrl, @@ -53,41 +58,116 @@ public TurSprinklrProcess(@Value("${turing.url}") String turingUrl, this.turSprinklrAttributeMappingRepository = turSprinklrAttributeMappingRepository; } + private static void addItemInExistingAttribute(String attributeValue, + Map attributes, + String attributeName) { + if (attributes.get(attributeName) instanceof ArrayList) + addItemToArray(attributes, attributeName, attributeValue); + else convertAttributeSingleValueToArray(attributes, attributeName, attributeValue); + } + + private static void addItemToArray(Map attributes, String attributeName, String attributeValue) { + List attributeValues = new ArrayList<>(((List) attributes.get(attributeName)) + .stream().map(String.class::cast).toList()); + attributeValues.add(attributeValue); + attributes.put(attributeName, attributeValues); + + } + + private static void convertAttributeSingleValueToArray(Map attributes, + String attributeName, String attributeValue) { + List attributeValues = new ArrayList<>(); + attributeValues.add(attributes.get(attributeName)); + attributeValues.add(attributeValue); + attributes.put(attributeName, attributeValues); + } + public void start(TurSprinklrSource turSprinklrSource) { reset(); + // Index for the pagination parameter of the Knowledge Base API, it starts on 0 AtomicInteger kbPage = new AtomicInteger(0); + + // Get a token for the API TurSprinklrTokenService turSprinklrTokenService = new TurSprinklrTokenService( TurSprinklrSecretKey.builder() .apiKey(turSprinklrSource.getApiKey()) .secretKey(turSprinklrSource.getSecretKey()) .environment(turSprinklrSource.getEnvironment()) - .build()); - TurSprinklrAccessToken turSprinklrAccessToken = turSprinklrTokenService - .getAccessToken(); + .build() + ); + TurSprinklrAccessToken turSprinklrAccessToken = turSprinklrTokenService.getAccessToken(); + if (turSprinklrAccessToken != null) { while (true) { TurSprinklrKBSearch turSprinklrKBSearch = TurSprinklrKBService.run(turSprinklrAccessToken, kbPage.get()); - if (!(turSprinklrKBSearch == null)) { + + if (turSprinklrKBSearch != null) { List results = turSprinklrKBSearch.getData().getSearchResults(); + if (results.isEmpty()) { break; } else { results.forEach(searchResult -> { - getPage(turSprinklrSource, searchResult, turSprinklrAccessToken); + Locale resultLocale = searchResult.getLocale(); + Collection turSites = turSprinklrSource.getTurSNSites(); + + // Inserts new jobs into turSNJobItems + getArticle(turSprinklrSource, searchResult, turSprinklrAccessToken); + + // Gets the assets attached to the search result and inserts into turSNJobItems. + List assets = getFileAssets(searchResult); + addFileAssetsToJobItens(assets, resultLocale, turSites); + + // Quando o tamanho de turSNJobItems alcançar o JobSize definido, envia para o turing. sendToTuringWhenMaxSize(); + getInfoQueue(); }); + // Increment Index kbPage.incrementAndGet(); } } } } if (turSNJobItems.size() > 0) { + // Envia os últimos jobs restantes. sendToTuring(); getInfoQueue(); } } + /** + * Extracts the file assets from the search result and returns a list of FileAsset objects. + */ + private List getFileAssets(TurSprinklrSearchResult searchResult) { + final var fileAssetExtractor = new FileAssetsExtractor(turingUrl, turingApiKey); + final var fileAssets = fileAssetExtractor.extractFromLinkedAssets(searchResult); + + if (fileAssets == null || fileAssets.isEmpty()) { + return Collections.emptyList(); + } + return fileAssets; + } + + /** + * Adds the file assets to the job items list. + */ + private void addFileAssetsToJobItens(List fileAssets, Locale locale, Collection turSites) { + for (var asset : fileAssets) { + var turSNJobItemAttributes = asset.toMapAttributes(); + TurSNJobItem turSNJobItem = new TurSNJobItem( + TurSNJobAction.CREATE, + (List) turSites, + locale, + turSNJobItemAttributes + ); + turSNJobItems.add(turSNJobItem); + } + } + + /** + * Clears the List of jobs in Turing API + */ private void reset() { turSNJobItems = new TurSNJobItems(); } @@ -96,13 +176,20 @@ private void getInfoQueue() { log.info("Total Job Item: {}", Iterators.size(turSNJobItems.iterator())); } - public void getPage(TurSprinklrSource turSprinklrSource, TurSprinklrSearchResult searchResult, - TurSprinklrAccessToken token) { + public void getArticle(TurSprinklrSource turSprinklrSource, TurSprinklrSearchResult searchResult, + TurSprinklrAccessToken token) { log.info("{}: {}", searchResult.getId(), turSprinklrSource.getTurSNSites()); addTurSNJobItems(turSprinklrSource, searchResult, token); } + /** + * Inserts a job in the job list parameter of this class (turSNJobItems) + * + * @param turSprinklrSource Source to extract the Semantic Navigation sites, locale and attributes + * @param searchResult Source to extract Locale and attributes + * @param token N/A + */ private void addTurSNJobItems(TurSprinklrSource turSprinklrSource, TurSprinklrSearchResult searchResult, TurSprinklrAccessToken token) { TurSNJobItem turSNJobItem = new TurSNJobItem(TurSNJobAction.CREATE, new ArrayList<>(turSprinklrSource.getTurSNSites()), @@ -111,8 +198,23 @@ private void addTurSNJobItems(TurSprinklrSource turSprinklrSource, TurSprinklrSe turSNJobItems.add(turSNJobItem); } + /** + * Get the Locale from inside turSprinklrSource entity, if it does not work, tries to get the locale from inside + * locale_class column of turSprinklrSource entity, if also does not work, return Locale.US. + * + * @param turSprinklrSource the method will first try to get the Locale from turSprinklrSource.getLocale(). + * @param searchResult is used to create a context where an attempt will be made to retrieve the locale. + * @param token N/A + * @return Locale based on the parameters. + */ public Locale getLocale(TurSprinklrSource turSprinklrSource, TurSprinklrSearchResult searchResult, TurSprinklrAccessToken token) { + /* + Try to get extract the locale from turSprinklrSource.getLocale() + Or else extracts the locale class (by default sprinklr-commons TurSprinklrExtLocal) from turSprinklrSource. + If getCustomClassMap found a class, converts the result to an instance of TurSprinklrExtLocaleInterface. + Then calls the .consume method to get a Locale, if none of this works, then return Locale.US + */ return Optional.ofNullable(turSprinklrSource.getLocale()) .orElseGet(() -> { if (!StringUtils.isEmpty(turSprinklrSource.getLocaleClass())) { @@ -131,22 +233,48 @@ private void sendToTuringWhenMaxSize() { } } + /** + * Returns a new HashMap of "Attribute(turing Field, Attribute Name from export.json) -> Attribute Value".

+ * This method is used when sending a job to Turing. + * + * @param turSprinklrSource Is used to find the turSprinklrAttributeMapping entity, it represents + * export.json file. + * @param token N/A + * @param searchResult If a CustomClass is defined by export.json file, the value will be extracted from searchResult + * @return the created HashMap + */ public Map getJobItemAttributes(TurSprinklrSource turSprinklrSource, TurSprinklrAccessToken token, TurSprinklrSearchResult searchResult) { Map turSNJobItemAttributes = new HashMap<>(); - turSprinklrAttributeMappingRepository.findByTurSprinklrSource(turSprinklrSource).ifPresent(source -> source.forEach(turSprinklrCustomClass -> - Optional.ofNullable(turSprinklrCustomClass.getText()).ifPresentOrElse(text -> - turSNJobItemAttributes.put(turSprinklrCustomClass.getName(), text) + /* Example + + NAME | TEXT(VALUE) + source_apps | SPRINKLR (Hardcoded) + created_date | 19-02-2015 (retrieved using a custom class) + + */ + + /* + Will retrieve the objects from 'turSprinklrAttributeMapping' entity, then for each one of the objects that this entity contains. Will get + the 'text' attribute of this object. + If the 'text' attribute is present, then will create a new key in turSNJobItemAttributes with the `name` attribute of this same object. The + value of this key will be the 'text'. + If not present, tries to get the ClassName from `AttributeMapping`, instantiate it dynamically and consumes it to get the value of the key. + */ + turSprinklrAttributeMappingRepository.findByTurSprinklrSource(turSprinklrSource).ifPresent(mapping -> mapping.forEach(attribute -> + Optional.ofNullable(attribute.getText()).ifPresentOrElse(text -> + turSNJobItemAttributes.put(attribute.getName(), text) , () -> { - if (!StringUtils.isEmpty(turSprinklrCustomClass.getClassName())) - getCustomClass(searchResult, token, turSprinklrCustomClass) + // Se o campo ClassName estiver preenchido no Export.json + if (!StringUtils.isEmpty(attribute.getClassName())) + getCustomClass(searchResult, token, attribute) .ifPresent(turMultiValue -> turMultiValue.forEach(attributeValue -> { if (!StringUtils.isBlank(attributeValue)) { - if (turSNJobItemAttributes.containsKey(turSprinklrCustomClass.getName())) { + if (turSNJobItemAttributes.containsKey(attribute.getName())) { addItemInExistingAttribute(attributeValue, - turSNJobItemAttributes, turSprinklrCustomClass.getName()); + turSNJobItemAttributes, attribute.getName()); } else { - addFirstItemToAttribute(turSprinklrCustomClass.getName(), + addFirstItemToAttribute(attribute.getName(), attributeValue, turSNJobItemAttributes); } } @@ -163,6 +291,9 @@ private Optional getCustomClass(TurSprinklrSearchResult searchRes .consume(getTurSprinklrContext(searchResult, token))); } + /** + * Builds a TurSprinklrContext object from a searchResult and a token object + */ public TurSprinklrContext getTurSprinklrContext(TurSprinklrSearchResult searchResult, TurSprinklrAccessToken token) { return TurSprinklrContext.builder() .searchResult(searchResult) @@ -170,36 +301,17 @@ public TurSprinklrContext getTurSprinklrContext(TurSprinklrSearchResult searchRe .build(); } + // Used only for getJobItemAttributes private void addFirstItemToAttribute(String attributeName, String attributeValue, Map attributes) { attributes.put(attributeName, attributeValue); } - private static void addItemInExistingAttribute(String attributeValue, - Map attributes, - String attributeName) { - if (attributes.get(attributeName) instanceof ArrayList) - addItemToArray(attributes, attributeName, attributeValue); - else convertAttributeSingleValueToArray(attributes, attributeName, attributeValue); - } - - private static void addItemToArray(Map attributes, String attributeName, String attributeValue) { - List attributeValues = new ArrayList<>(((List) attributes.get(attributeName)) - .stream().map(String.class::cast).toList()); - attributeValues.add(attributeValue); - attributes.put(attributeName, attributeValues); - - } - - private static void convertAttributeSingleValueToArray(Map attributes, - String attributeName, String attributeValue) { - List attributeValues = new ArrayList<>(); - attributeValues.add(attributes.get(attributeName)); - attributeValues.add(attributeValue); - attributes.put(attributeName, attributeValues); - } + /** + * Push current turSNJobItems to turing. + */ private void sendToTuring() { if (log.isDebugEnabled()) { for (TurSNJobItem turSNJobItem : turSNJobItems) { diff --git a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/persistence/model/TurSprinklrSource.java b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/persistence/model/TurSprinklrSource.java index ba623995b..fda306a5f 100644 --- a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/persistence/model/TurSprinklrSource.java +++ b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/persistence/model/TurSprinklrSource.java @@ -13,6 +13,9 @@ import java.util.HashSet; import java.util.Locale; +/** + * An entity that represents the connections to Sprinklr servers + */ @Builder(toBuilder = true) @AllArgsConstructor @NoArgsConstructor diff --git a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAsset.java b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAsset.java new file mode 100644 index 000000000..deb7d56a2 --- /dev/null +++ b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAsset.java @@ -0,0 +1,56 @@ +package com.viglet.turing.connector.sprinklr.utils; + +import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; + +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Represents a File Asset extracted from Sprinklr Knowledge Base search result. + * @author Gabriel F. Gomazako + * @since 0.3.9 + */ +@AllArgsConstructor +@NoArgsConstructor +public class FileAsset { + private String id; + private String filename; + private String ocrContent; + private Date indexingDate; + private Date modificationDate; + private URL url; + private long fileSize; + private String extension; + private String assetType; + private String assetCategory; + + /** + * Converts this FileAsset to an attribute map. + * @return the attribute map. + */ + public Map toMapAttributes() { + var attributes = new HashMap(); + + var formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + String formatedIndexingDate = formatter.format(indexingDate); + String formatedModificationDate = formatter.format(modificationDate); + + attributes.put("id", id); + attributes.put("title", filename); + attributes.put("text", ocrContent); + attributes.put("publication_date", formatedIndexingDate); + attributes.put("modification_date", formatedModificationDate); + attributes.put("url", url); + attributes.put("filesize", fileSize); + attributes.put("extension", extension); + attributes.put("source_apps", List.of("SPRINKLR")); + attributes.put("type", "Static File"); + + return attributes; + } +} diff --git a/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAssetsExtractor.java b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAssetsExtractor.java new file mode 100644 index 000000000..24bbdc037 --- /dev/null +++ b/turing-sprinklr/sprinklr-app/src/main/java/com/viglet/turing/connector/sprinklr/utils/FileAssetsExtractor.java @@ -0,0 +1,147 @@ +package com.viglet.turing.connector.sprinklr.utils; + +import com.viglet.turing.client.auth.TurServer; +import com.viglet.turing.client.auth.credentials.TurApiKeyCredentials; +import com.viglet.turing.client.ocr.TurOcr; +import com.viglet.turing.sprinklr.client.service.kb.response.TurSprinklrAsset; +import com.viglet.turing.sprinklr.client.service.kb.response.TurSprinklrSearchResult; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.io.FileUtils; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.UUID; + + +// Estou nomeando de File extractor ao invés de asset extractor pois talvez linked Assets pode ser mais do que da categoria "file-attachment" +// https://www.sprinklr.com/help/articles/manage-assets/add-an-asset/641a8fdda1367f1be7db8255 + +/** + * Extracts File Assets from Sprinklr Knowledge Base search result. Has turing URL and turing API key just to use the + * OCR API. + * @author Gabriel F. Gomazako + * @see FileAsset + * @since 0.3.9 + */ +@Log4j2 +public class FileAssetsExtractor { + /** + * Used for Turing OCR API + */ + final String turingUrl; + /** + * Used for Turing OCR API + */ + final String turingApiKey; + + public FileAssetsExtractor(String turingUrl, String turingApiKey) { + this.turingUrl = turingUrl; + this.turingApiKey = turingApiKey; + } + + + /** + * Extract all Files and its metadata from Linked Asset key from searchResult. + * @param searchResult Knowledge Base search API result. + */ + public List extractFromLinkedAssets(TurSprinklrSearchResult searchResult) { + // Extracting LinkedAssets from the search API result of Sprinkler Knowledge Base. + List linkedAssets = searchResult.getLinkedAssets(); + + // If there are no linked assets, return an empty list. + if (linkedAssets == null || linkedAssets.isEmpty()) { + return Collections.emptyList(); + } + + List fileAssets = new ArrayList<>(); + + // Vamos extrair informações a partir de cada asset, vamos baixar o documento para conseguir seu tamanho e informações sobre a data + // Também vamos usar OCR para extrair o conteúdo do arquivo. + for (var asset : linkedAssets) { + String id = null; + String filename = null; + String extension = null; + URL url = null; + try { + // assetId it's the complete URI of the file. + URI assetURI = new URI(asset.getAssetId()); // ex: google.com/files/text.pdf. + id = assetURI.getPath();// /files/text.pdf + id = id.substring(id.lastIndexOf('/') + 1); // text.pdf + + url = assetURI.toURL(); + + int lastDotIndex = id.lastIndexOf('.'); + filename = id.substring(0, lastDotIndex); + extension = id.substring(lastDotIndex + 1); + + } catch (URISyntaxException | MalformedURLException e) { + log.error(e); + } + + File downloadedFile = downloadFile(url); + String contentFromDownloadedFile = null; + // Tries to use turing OCR API to extract content from the downloaded file. + try { + log.info("Sending documento to OCR api in: {}", URI.create(turingUrl).toURL()); + log.info("file type={}", asset.getAssetType()); + + TurServer turingServer = new TurServer(URI.create(turingUrl).toURL(), new TurApiKeyCredentials(turingApiKey)); + TurOcr ocrProcessor = new TurOcr(); + contentFromDownloadedFile = ocrProcessor.processFile(turingServer, downloadedFile, false).toString(); + } catch (MalformedURLException e) { + log.error(e); + } + + Date indexingDate = new Date(); + Date modificationDate = null; + long fileSize = -1; + try { + long dateFromFile = Files.getLastModifiedTime(downloadedFile.toPath()).toMillis(); + modificationDate = new Date(dateFromFile); + fileSize = Files.size(downloadedFile.toPath()); + } catch (IOException e) { + log.error(e); + } + + var assetType = asset.getAssetType(); + var assetCategory = asset.getAssetCategory(); + + var fileAsset = new FileAsset( + ("sprinklr" + id), + filename, + contentFromDownloadedFile, + indexingDate, + modificationDate, + url, + fileSize, + extension, + assetType, + assetCategory); + + fileAssets.add(fileAsset); + } + return fileAssets; + } + + private File downloadFile(URL url) { + try { + File file = new File("/store/tmp/" + UUID.randomUUID() + ".pdf"); + FileUtils.copyURLToFile(url, file, 5000, 5000); + + return file; + + } catch (IOException e) { + log.error(e); + } + return null; + } +} diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocale.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocale.java index 3cd408133..82f591d47 100644 --- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocale.java +++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocale.java @@ -6,6 +6,9 @@ public class TurSprinklrExtLocale implements TurSprinklrExtLocaleInterface { + /** + * Gets locale from Sprinklr Context + */ @Override public Locale consume(TurSprinklrContext context) { return context.getSearchResult().getLocale(); diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocaleInterface.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocaleInterface.java index 5be9c75e4..363bf2793 100644 --- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocaleInterface.java +++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/ext/TurSprinklrExtLocaleInterface.java @@ -5,5 +5,8 @@ import java.util.Locale; public interface TurSprinklrExtLocaleInterface { + /** + * Do something with Sprinklr Context to return a Locale. + */ Locale consume(TurSprinklrContext context); }