From ef62853eb0021108b4ad043d3f704e487c26e0fd Mon Sep 17 00:00:00 2001 From: Carroll Date: Wed, 15 May 2024 09:22:59 -0700 Subject: [PATCH] For locale dependant files do not verify contents with hash. Signed-off-by: Carroll --- .../ingest/attachment/TikaDocTests.java | 15 +++++++-------- .../opensearch/ingest/attachment/test/.checksums | 8 ++++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/plugins/ingest-attachment/src/test/java/org/opensearch/ingest/attachment/TikaDocTests.java b/plugins/ingest-attachment/src/test/java/org/opensearch/ingest/attachment/TikaDocTests.java index 1a272cb66af27..a022b8b9bf8b0 100644 --- a/plugins/ingest-attachment/src/test/java/org/opensearch/ingest/attachment/TikaDocTests.java +++ b/plugins/ingest-attachment/src/test/java/org/opensearch/ingest/attachment/TikaDocTests.java @@ -40,17 +40,17 @@ import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.test.OpenSearchTestCase; -import org.junit.Before; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Locale; import java.util.Map; /** * Parse sample tika documents and assert the contents has not changed according to previously recorded checksums. * Uncaught changes to tika parsing could potentially pose bwc issues. + * Note: In some cases tika will access a user's locale to inform the parsing of a file. + * The checksums of these files are left empty, and we only validate that parsed content is not null. */ @SuppressFileSystems("ExtrasFS") // don't try to parse extraN public class TikaDocTests extends OpenSearchTestCase { @@ -59,11 +59,6 @@ public class TikaDocTests extends OpenSearchTestCase { static final String TIKA_FILES = "/org/opensearch/ingest/attachment/test/tika-files/"; static final String TIKA_CHECKSUMS = "/org/opensearch/ingest/attachment/test/.checksums"; - @Before - public void setLocale() { - Locale.setDefault(Locale.ENGLISH); - } - public void testParseSamples() throws Exception { String checksumJson = Files.readString(PathUtils.get(getClass().getResource(TIKA_CHECKSUMS).toURI())); Map checksums = XContentHelper.convertToMap(JsonXContent.jsonXContent, checksumJson, false); @@ -73,7 +68,11 @@ public void testParseSamples() throws Exception { String parsedContent = tryParse(doc); assertNotNull(parsedContent); assertFalse(parsedContent.isEmpty()); - assertEquals(checksums.get(doc.getFileName().toString()), DigestUtils.sha1Hex(parsedContent)); + + String check = checksums.get(doc.getFileName().toString()).toString(); + if (!check.isEmpty()) { + assertEquals(check, DigestUtils.sha1Hex(parsedContent)); + } } stream.close(); diff --git a/plugins/ingest-attachment/src/test/resources/org/opensearch/ingest/attachment/test/.checksums b/plugins/ingest-attachment/src/test/resources/org/opensearch/ingest/attachment/test/.checksums index 15480a35e2461..c0d8028a8d690 100644 --- a/plugins/ingest-attachment/src/test/resources/org/opensearch/ingest/attachment/test/.checksums +++ b/plugins/ingest-attachment/src/test/resources/org/opensearch/ingest/attachment/test/.checksums @@ -55,7 +55,7 @@ "testRTFWithCurlyBraces.rtf": "019cab63b73ff89d094823cf50c0a721bec08ee2", "testFooter.ods": "846e1d0415b23fa27631b536b0cf566abbf8fcc1", "testPPT.ppt": "933ee556884b1d9e28b801daa0d77bbaa4f4be62", - "testEXCEL-formats.xls": "3f3e2e5cd7d6527af8d15e5668dc2cf7c33b25fe", + "testEXCEL-formats.xls": "", "testPPT_masterFooter.pptx": "29bb97006b3608b7db6ff72b94d20157878d94dd", "testWORD_header_hyperlink.doc": "914bbec0730c54948ad307ea3e375ef0c100abf1", "testRTFHyperlink.rtf": "2b2ffb1997aa495fbab1af490d134051de168c97", @@ -112,7 +112,7 @@ "testPPT_embedded_two_slides.pptx": "0d760dbaf9d9d2f173dd40deecd0de5ecb885301", "testPDF_bookmarks.pdf": "5fc486c443511452db4f1aa6530714c6aa49c831", "test_recursive_embedded.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9", - "testEXCEL-formats.xlsx": "801f4850a8e5dca36cd2e3544cb4e74d8f4265f5", + "testEXCEL-formats.xlsx": "", "testPPT_masterText2.pptx": "2b01eab5d0349e3cfe791b28c70c2dbf4efc884d", "test.doc": "774be3106edbb6d80be36dbb548d62401dcfa0fe", "test_recursive_embedded_npe.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9", @@ -174,7 +174,7 @@ "testPPTX_Thumbnail.pptx": "6aa019154289317c7b7832fe46556e6d61cd0a9f", "testRTFTableCellSeparation.rtf": "5647290a3197c1855fad10201dc7be60ea7b0e42", "testRTFControls.rtf": "aee6afb80e8b09cf49f056020c037f70c2757e49", - "testEXCEL.xls": "b5b3302499974062a7b1abd4ed523e895785b702", + "testEXCEL.xls": "", "testRTFJapanese.rtf": "08976f9a7d6d3a155cad84d7fa23295cb972a17a", "testPageNumber.pdf": "96b03d2cc6782eba653af28228045964e68422b5", "testOptionalHyphen.pdf": "12edd450ea76ea4e79f80ebd3442999ec2180dbc", @@ -192,7 +192,7 @@ "testPPT.ppsm": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6", "boilerplate.html": "b3558f02c3179e4aeeb6057594d87bda79964e7b", "testEXCEL_embeded.xls": "110247fc0a3936828c760e40975ff83e4578be76", - "testEXCEL.xlsx": "b39735e1498ec538615366b48dcfb67b558203b1", + "testEXCEL.xlsx": "", "testPPT_2imgs.ppt": "9a68072ffcf171389e78cf8bc018c4b568a6202d", "testComment.pptx": "6ae6052f469b8f901fd4fd8bc70f8e267255a58e", "testPDF_Version.6.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",