Skip to content

Commit

Permalink
change attributeName and add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
gLeirbag committed Sep 17, 2024
1 parent df59fda commit 6b79a53
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,16 @@ public Map<String, Object> toMapAttributes() {
String formatedModificationDate = formatter.format(modificationDate);

attributes.put("id", id);
attributes.put("filename", filename);
attributes.put("ocr_content", ocrContent);
// TODO: Change TypeDef in Solr.
attributes.put("indexing_date", formatedIndexingDate);
attributes.put("title", filename);
attributes.put("text", ocrContent);
attributes.put("publication_date", formatedIndexingDate);
attributes.put("modification_date", formatedModificationDate);
attributes.put("url", url);
attributes.put("file_size", fileSize);
attributes.put("filesize", fileSize);
attributes.put("extension", extension);
attributes.put("asset_type", assetType);
attributes.put("asset_category", assetCategory);
attributes.put("source_apps", List.of("SPRINKLR"));
attributes.put("type", "Static File");

return attributes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
/**
* Extracts File Assets from Sprinklr Knowledge Base search result. Has turing URL and turing API key just to use the
* OCR API.
*
* @author Gabriel F. Gomazako
* @see FileAsset
* @since 0.3.9
Expand All @@ -55,10 +54,10 @@ public FileAssetsExtractor(String turingUrl, String turingApiKey) {
* @param searchResult Knowledge Base search API result.
*/
public List<FileAsset> extractFromLinkedAssets(TurSprinklrSearchResult searchResult) {
// Extraindo LinkedAssets do resultado da chamada de API de search do Knowledge Base do Sprinkler
// Extracting LinkedAssets from the search API result of Sprinkler Knowledge Base.
List<TurSprinklrAsset> linkedAssets = searchResult.getLinkedAssets();

// Se não houver resultados.
// If there are no linked assets, return an empty list.
if (linkedAssets == null || linkedAssets.isEmpty()) {
return Collections.emptyList();
}
Expand All @@ -73,7 +72,7 @@ public List<FileAsset> extractFromLinkedAssets(TurSprinklrSearchResult searchRes
String extension = null;
URL url = null;
try {
// assetId em arquivos é a completa do URL do arquivo
// assetId it's the complete URI of the file.
URI assetURI = new URI(asset.getAssetId()); // ex: google.com/files/text.pdf.
id = assetURI.getPath();// /files/text.pdf
id = id.substring(id.lastIndexOf('/') + 1); // text.pdf
Expand All @@ -90,7 +89,7 @@ public List<FileAsset> extractFromLinkedAssets(TurSprinklrSearchResult searchRes

File downloadedFile = downloadFile(url);
String contentFromDownloadedFile = null;
// Usa OCR para converter o arquivo para string.
// Tries to use turing OCR API to extract content from the downloaded file.
try {
log.info("Sending documento to OCR api in: {}", URI.create(turingUrl).toURL());
log.info("file type={}", asset.getAssetType());
Expand Down

0 comments on commit 6b79a53

Please sign in to comment.