From 65cd081b22a0eba96f2ff224111f96903302ab10 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 10:52:44 -0500
Subject: [PATCH 01/57] Adding create index and document functions

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 test/operations.py

diff --git a/test/operations.py b/test/operations.py
new file mode 100644
index 000000000..a25ebbf90
--- /dev/null
+++ b/test/operations.py
@@ -0,0 +1,55 @@
+import requests, json
+
+class Operations:
+    @staticmethod
+    def create_index(endpoint, index_name, auth=None, data=None):
+        response = requests.put(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to create index')
+            print(response.text)
+        else:
+            print('Created index successfully')
+            print(response.text)
+
+        pass
+
+    @staticmethod
+    def create_document(endpoint, index_name, auth=None):
+        doc_id = '7'
+        document = {
+            'title': 'Test Document',
+            'content': 'This is a sample document for testing OpenSearch.'
+        }
+        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+        headers = {'Content-Type': 'application/json'}
+
+        # Create the document
+        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+        headers = {'Content-Type': 'application/json'}
+
+        response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
+
+        if response.status_code != 201:
+            print('Failed to create document')
+            print(response.text)
+        else:
+            print('Created document successfully')
+            print(response.text)
+        pass
+
+
+
+def main():
+
+    username = '*'  # Enter master username and password
+    password = '*'
+    auth = (username, password)
+    endpoint = '*:443'  # Replace * with domain endpoint
+    index = 'my_index'
+
+    Operations.create_index(endpoint, index, auth)
+    Operations.create_document(endpoint, index, auth)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 2f22e64d263f883a916201deccdf699deb733950 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 12:02:29 -0500
Subject: [PATCH 02/57] Adding delete index function

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index a25ebbf90..03335e396 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -14,8 +14,20 @@ def create_index(endpoint, index_name, auth=None, data=None):
         pass
 
     @staticmethod
-    def create_document(endpoint, index_name, auth=None):
-        doc_id = '7'
+    def delete_index(endpoint, index_name, auth=None, data=None):
+        response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to delete index')
+            print(response.text)
+        else:
+            print('Deleted index successfully')
+            print(response.text)
+
+    pass
+
+
+    @staticmethod
+    def create_document(endpoint, index_name, doc_id, auth=None):
         document = {
             'title': 'Test Document',
             'content': 'This is a sample document for testing OpenSearch.'
@@ -23,10 +35,6 @@ def create_document(endpoint, index_name, auth=None):
         url = f'{endpoint}/{index_name}/_doc/{doc_id}'
         headers = {'Content-Type': 'application/json'}
 
-        # Create the document
-        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
-        headers = {'Content-Type': 'application/json'}
-
         response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
 
         if response.status_code != 201:
@@ -44,11 +52,13 @@ def main():
     username = '*'  # Enter master username and password
     password = '*'
     auth = (username, password)
-    endpoint = '*:443'  # Replace * with domain endpoint
+    endpoint = '*'   # Replace * with domain endpoint
     index = 'my_index'
+    doc_id = '7'
 
     Operations.create_index(endpoint, index, auth)
-    Operations.create_document(endpoint, index, auth)
+    Operations.create_document(endpoint, index, doc_id, auth)
+    Operations.delete_index(endpoint, index, auth)
 
 
 if __name__ == "__main__":

From aedf6fef3dd4ecdd6bc7782af58e95eb749460f1 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 12:06:27 -0500
Subject: [PATCH 03/57] Adding delete document function

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/test/operations.py b/test/operations.py
index 03335e396..27828894b 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -25,6 +25,17 @@ def delete_index(endpoint, index_name, auth=None, data=None):
 
     pass
 
+    @staticmethod
+    def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+        response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to delete document')
+            print(response.text)
+        else:
+            print('Deleted document successfully')
+            print(response.text)
+
+    pass
 
     @staticmethod
     def create_document(endpoint, index_name, doc_id, auth=None):
@@ -52,14 +63,16 @@ def main():
     username = '*'  # Enter master username and password
     password = '*'
     auth = (username, password)
-    endpoint = '*'   # Replace * with domain endpoint
+    endpoint = '*:443'  # Replace * with domain endpoint
     index = 'my_index'
     doc_id = '7'
 
     Operations.create_index(endpoint, index, auth)
     Operations.create_document(endpoint, index, doc_id, auth)
+    Operations.delete_document(endpoint, index, doc_id, auth)
     Operations.delete_index(endpoint, index, auth)
 
 
+
 if __name__ == "__main__":
     main()
\ No newline at end of file

From 2276b38d3f822afff25a098af30fe4793a328e2b Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 2 Jun 2023 01:55:23 -0500
Subject: [PATCH 04/57] Adds some tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 52 +++++++++++++++++++++++++++--------------
 test/tests.py      | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 18 deletions(-)
 create mode 100644 test/tests.py

diff --git a/test/operations.py b/test/operations.py
index 27828894b..941c9abbd 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,41 +1,57 @@
-import requests, json
+from http import HTTPStatus
+import requests
+import json
+import os
+
 
 class Operations:
     @staticmethod
     def create_index(endpoint, index_name, auth=None, data=None):
         response = requests.put(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to create index')
             print(response.text)
         else:
             print('Created index successfully')
             print(response.text)
 
-        pass
+        return response
+
+    @staticmethod
+    def check_index(endpoint, index_name, auth=None, data=None):
+        response = requests.head(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != HTTPStatus.OK:
+            print('Failed to create index')
+            print(response.text)
+        else:
+            print('Created index successfully')
+            print(response.text)
+
+        return response
 
     @staticmethod
     def delete_index(endpoint, index_name, auth=None, data=None):
         response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to delete index')
             print(response.text)
         else:
             print('Deleted index successfully')
             print(response.text)
 
-    pass
+        return response
 
     @staticmethod
     def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
         response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to delete document')
             print(response.text)
         else:
             print('Deleted document successfully')
             print(response.text)
 
-    pass
+        return response
 
     @staticmethod
     def create_document(endpoint, index_name, doc_id, auth=None):
@@ -48,31 +64,31 @@ def create_document(endpoint, index_name, doc_id, auth=None):
 
         response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
 
-        if response.status_code != 201:
+        if response.status_code != HTTPStatus.CREATED:
             print('Failed to create document')
             print(response.text)
         else:
             print('Created document successfully')
             print(response.text)
-        pass
 
+        return response
 
 
 def main():
 
-    username = '*'  # Enter master username and password
-    password = '*'
+    username = os.getenv('USERNAME')
+    password = os.getenv('PASSWORD')
+    endpoint = os.getenv('ENDPOINT') # Dont forget port number.
+
     auth = (username, password)
-    endpoint = '*:443'  # Replace * with domain endpoint
     index = 'my_index'
     doc_id = '7'
 
-    Operations.create_index(endpoint, index, auth)
-    Operations.create_document(endpoint, index, doc_id, auth)
-    Operations.delete_document(endpoint, index, doc_id, auth)
-    Operations.delete_index(endpoint, index, auth)
-
+    response1 = Operations.create_index(endpoint, index)
+    response2 = Operations.create_document(endpoint, index, doc_id, auth)
+    response3 = Operations.delete_document(endpoint, index, doc_id, auth)
+    response4 = Operations.delete_index(endpoint, index)
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/test/tests.py b/test/tests.py
new file mode 100644
index 000000000..1e298a177
--- /dev/null
+++ b/test/tests.py
@@ -0,0 +1,58 @@
+from operations import Operations
+from http import HTTPStatus
+import unittest
+import os
+
+# This code is probably existing as a placeholder only for testing purposes,
+# final code here will be using comparator's results. Also, won't be sending request directly to target cluster,
+# and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
+# the triples log file.
+# TODO: Add endpoints to be read from environment variable.
+class MyTestCase(unittest.TestCase):
+    def test_index(self):
+        endpoint1=os.getenv('ENDPOINT_1')
+        endpoint2=os.getenv('ENDPOINT_2')
+        index="my_index"
+        response1 = Operations.create_index(endpoint1, index)
+        response2 = Operations.create_index(endpoint2, index)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+
+        response1 = Operations.delete_index(endpoint1, index)
+        response2 = Operations.delete_index(endpoint2, index)
+        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+
+    def test_document(self):
+        endpoint1=os.getenv('ENDPOINT_1')
+        endpoint2=os.getenv('ENDPOINT_2')
+        index="my_index2"
+        doc_id='7'
+        response1 = Operations.create_index(endpoint1, index)
+        response2 = Operations.create_index(endpoint2, index)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+
+
+
+        response1 = Operations.create_document(endpoint1, index, doc_id)
+        response2 = Operations.create_document(endpoint2, index, doc_id)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+        self.assert_(response1.status_code == HTTPStatus.CREATED and response2.status_code == HTTPStatus.CREATED)
+
+        response1 = Operations.delete_document(endpoint1, index, doc_id)
+        response2 = Operations.delete_document(endpoint2, index, doc_id)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+
+        response1 = Operations.delete_index(endpoint1, index)
+        response2 = Operations.delete_index(endpoint2, index)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+
+    def test_unsupported_transformation(self):
+
+        pass
+
+    def test_supported_transformation(self):
+        pass
+
+if __name__ == '__main__':
+    unittest.main()

From 36e558ae9fb6a88b57ca151244628037de209e50 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 2 Jun 2023 14:12:12 -0500
Subject: [PATCH 05/57] Add default value for some environment variables +
 fixing some linting errors

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py |  6 +++---
 test/tests.py      | 24 +++++++++++++-----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 941c9abbd..06fb4a04d 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -76,9 +76,9 @@ def create_document(endpoint, index_name, doc_id, auth=None):
 
 def main():
 
-    username = os.getenv('USERNAME')
-    password = os.getenv('PASSWORD')
-    endpoint = os.getenv('ENDPOINT') # Dont forget port number.
+    username = os.getenv('USERNAME', '')
+    password = os.getenv('PASSWORD', '')
+    endpoint = os.getenv('ENDPOINT')  # Dont forget port number.
 
     auth = (username, password)
     index = 'my_index'
diff --git a/test/tests.py b/test/tests.py
index 1e298a177..521bc40c3 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -8,11 +8,13 @@
 # and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
 # the triples log file.
 # TODO: Add endpoints to be read from environment variable.
+
+
 class MyTestCase(unittest.TestCase):
     def test_index(self):
-        endpoint1=os.getenv('ENDPOINT_1')
-        endpoint2=os.getenv('ENDPOINT_2')
-        index="my_index"
+        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
+        endpoint2 = os.getenv('ENDPOINT_2')
+        index = "my_index"
         response1 = Operations.create_index(endpoint1, index)
         response2 = Operations.create_index(endpoint2, index)
         self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
@@ -20,19 +22,17 @@ def test_index(self):
 
         response1 = Operations.delete_index(endpoint1, index)
         response2 = Operations.delete_index(endpoint2, index)
-        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
     def test_document(self):
-        endpoint1=os.getenv('ENDPOINT_1')
-        endpoint2=os.getenv('ENDPOINT_2')
-        index="my_index2"
-        doc_id='7'
+        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
+        endpoint2 = os.getenv('ENDPOINT_2')
+        index = "my_index2"
+        doc_id = '7'
         response1 = Operations.create_index(endpoint1, index)
         response2 = Operations.create_index(endpoint2, index)
         self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
-
-
         response1 = Operations.create_document(endpoint1, index, doc_id)
         response2 = Operations.create_document(endpoint2, index, doc_id)
         # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
@@ -45,14 +45,16 @@ def test_document(self):
         response1 = Operations.delete_index(endpoint1, index)
         response2 = Operations.delete_index(endpoint2, index)
         # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
     def test_unsupported_transformation(self):
 
         pass
 
     def test_supported_transformation(self):
+
         pass
 
+
 if __name__ == '__main__':
     unittest.main()

From e59dedfdeefb06a28a68830fc3fcc491827a3e1e Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 5 Jun 2023 13:40:11 -0500
Subject: [PATCH 06/57] Addressing feedback + Actually using the proxy and
 replayer

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 127 ++++++++++++++++++---------------------------
 test/tests.py      | 118 ++++++++++++++++++++++++++++-------------
 2 files changed, 132 insertions(+), 113 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 06fb4a04d..648f7c6c4 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,93 +1,68 @@
-from http import HTTPStatus
 import requests
 import json
 import os
 
 
-class Operations:
-    @staticmethod
-    def create_index(endpoint, index_name, auth=None, data=None):
-        response = requests.put(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to create index')
-            print(response.text)
-        else:
-            print('Created index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def check_index(endpoint, index_name, auth=None, data=None):
-        response = requests.head(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to create index')
-            print(response.text)
-        else:
-            print('Created index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def delete_index(endpoint, index_name, auth=None, data=None):
-        response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to delete index')
-            print(response.text)
-        else:
-            print('Deleted index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
-        response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to delete document')
-            print(response.text)
-        else:
-            print('Deleted document successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def create_document(endpoint, index_name, doc_id, auth=None):
-        document = {
-            'title': 'Test Document',
-            'content': 'This is a sample document for testing OpenSearch.'
-        }
-        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
-        headers = {'Content-Type': 'application/json'}
-
-        response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
-
-        if response.status_code != HTTPStatus.CREATED:
-            print('Failed to create document')
-            print(response.text)
-        else:
-            print('Created document successfully')
-            print(response.text)
-
-        return response
+def create_index(endpoint, index_name, auth=None, data=None):
+    response = requests.put(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def check_index(endpoint, index_name, auth=None, data=None):
+    response = requests.get(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def delete_index(endpoint, index_name, auth=None, data=None):
+    response = requests.delete(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+    response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth, verify=False)
+
+    return response
+
+
+def create_document(endpoint, index_name, doc_id, auth=None):
+    document = {
+        'title': 'Test Document',
+        'content': 'This is a sample document for testing OpenSearch.'
+    }
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth, verify=False)
+
+    return response
+
+
+def get_document(endpoint, index_name, doc_id, auth=None):
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.get(url, headers=headers, auth=auth, verify=False)
+
+    return response
 
 
 def main():
 
-    username = os.getenv('USERNAME', '')
-    password = os.getenv('PASSWORD', '')
-    endpoint = os.getenv('ENDPOINT')  # Dont forget port number.
+    username = os.getenv('USERNAME', 'admin')
+    password = os.getenv('PASSWORD', 'admin')
+    endpoint = os.getenv('ENDPOINT', 'https://localhost:9200')  # Dont forget port number.
 
     auth = (username, password)
     index = 'my_index'
     doc_id = '7'
 
-    response1 = Operations.create_index(endpoint, index)
-    response2 = Operations.create_document(endpoint, index, doc_id, auth)
-    response3 = Operations.delete_document(endpoint, index, doc_id, auth)
-    response4 = Operations.delete_index(endpoint, index)
+    create_index(endpoint, index, auth)
+    create_document(endpoint, index, doc_id, auth)
+    delete_document(endpoint, index, doc_id, auth)
+    delete_index(endpoint, index, auth)
 
 
 if __name__ == "__main__":
diff --git a/test/tests.py b/test/tests.py
index 521bc40c3..b7adc9c0e 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,59 +1,103 @@
-from operations import Operations
+from operations import create_index, check_index, create_document, delete_document, get_document, delete_index
 from http import HTTPStatus
 import unittest
 import os
 
-# This code is probably existing as a placeholder only for testing purposes,
-# final code here will be using comparator's results. Also, won't be sending request directly to target cluster,
-# and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
-# the triples log file.
-# TODO: Add endpoints to be read from environment variable.
+from time import sleep
 
 
-class MyTestCase(unittest.TestCase):
+# Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
+
+
+class E2ETests(unittest.TestCase):
+    def test_cleanup(self):
+        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
+        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
+        # never gets to the point where it gets deleted.
+        # The final state of the script should be cleaning up after each test run.
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
+        doc_id = '7'
+        delete_index(source_endpoint, index, auth)
+        delete_document(source_endpoint, index, doc_id, auth)
+
     def test_index(self):
-        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
-        endpoint2 = os.getenv('ENDPOINT_2')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
         index = "my_index"
-        response1 = Operations.create_index(endpoint1, index)
-        response2 = Operations.create_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
 
-        response1 = Operations.delete_index(endpoint1, index)
-        response2 = Operations.delete_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        source_response = create_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
+
+        # TODO: check comparator's results here.
+
+        source_response = delete_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
+        # on the target cluster, so the check will find the index and return a code 200 instead of 404.
+
+        # TODO: check comparator's results here.
+
+        sleep(3)
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_document(self):
-        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
-        endpoint2 = os.getenv('ENDPOINT_2')
-        index = "my_index2"
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
         doc_id = '7'
-        response1 = Operations.create_index(endpoint1, index)
-        response2 = Operations.create_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
-        response1 = Operations.create_document(endpoint1, index, doc_id)
-        response2 = Operations.create_document(endpoint2, index, doc_id)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code == HTTPStatus.CREATED and response2.status_code == HTTPStatus.CREATED)
+        source_response = create_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        response1 = Operations.delete_document(endpoint1, index, doc_id)
-        response2 = Operations.delete_document(endpoint2, index, doc_id)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
-        response1 = Operations.delete_index(endpoint1, index)
-        response2 = Operations.delete_index(endpoint2, index)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        # TODO: check comparator's results here.
 
-    def test_unsupported_transformation(self):
+        source_response = create_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.CREATED)
+        # TODO: check comparator's results here.
+        # TODO: compare two documents below instead of just confirming they exist
 
-        pass
+        source_response = get_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-    def test_supported_transformation(self):
+        target_response = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
+
+        source_response = delete_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+
+        target_response = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+
+        source_response = delete_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        pass
+        # TODO: check comparator's results here.
+
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+
+    def test_unsupported_transformation(self):
+        self.assertTrue(True)
+
+    def test_supported_transformation(self):
+        self.assertTrue(True)
 
 
 if __name__ == '__main__':

From 4830a7e6ec4c832ee467fafcb79a6db740fa7752 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 02:11:33 -0500
Subject: [PATCH 07/57] Adding typehints + improving tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 26 ++++++++++++++++++++------
 test/tests.py      | 16 +++++++++++++---
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 648f7c6c4..36ff06e35 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -2,32 +2,35 @@
 import json
 import os
 
+from typing import Optional, Tuple
 
-def create_index(endpoint, index_name, auth=None, data=None):
+
+def create_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.put(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def check_index(endpoint, index_name, auth=None, data=None):
+def check_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.get(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def delete_index(endpoint, index_name, auth=None, data=None):
+def delete_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.delete(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+def delete_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
     response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth, verify=False)
 
     return response
 
 
-def create_document(endpoint, index_name, doc_id, auth=None):
+def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None,
+                    data: Optional[dict] = None):
     document = {
         'title': 'Test Document',
         'content': 'This is a sample document for testing OpenSearch.'
@@ -40,7 +43,7 @@ def create_document(endpoint, index_name, doc_id, auth=None):
     return response
 
 
-def get_document(endpoint, index_name, doc_id, auth=None):
+def check_document(endpoint, index_name, doc_id, auth=None):
     url = f'{endpoint}/{index_name}/_doc/{doc_id}'
     headers = {'Content-Type': 'application/json'}
 
@@ -49,6 +52,17 @@ def get_document(endpoint, index_name, doc_id, auth=None):
     return response
 
 
+def get_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.get(url, headers=headers, auth=auth, verify=False)
+    document = response.json()
+    content = document['_source']
+
+    return content
+
+
 def main():
 
     username = os.getenv('USERNAME', 'admin')
diff --git a/test/tests.py b/test/tests.py
index b7adc9c0e..c80168326 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,4 +1,5 @@
 from operations import create_index, check_index, create_document, delete_document, get_document, delete_index
+from operations import check_document
 from http import HTTPStatus
 import unittest
 import os
@@ -35,11 +36,15 @@ def test_index(self):
         source_response = create_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
+        sleep(3)
+
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
+        sleep(3)
+
         source_response = delete_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
@@ -73,16 +78,21 @@ def test_document(self):
         # TODO: check comparator's results here.
         # TODO: compare two documents below instead of just confirming they exist
 
-        source_response = get_document(source_endpoint, index, doc_id, auth)
+        source_response = check_document(source_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = get_document(target_endpoint, index, doc_id, auth)
+        target_response = check_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
+        source_content = get_document(source_endpoint, index, doc_id, auth)
+        target_content = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(source_content, target_content)
+
         source_response = delete_document(source_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = get_document(target_endpoint, index, doc_id, auth)
+        target_response = check_document(target_endpoint, index, doc_id, auth)
+
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
 
         source_response = delete_index(source_endpoint, index, auth)

From d10cbc9b0728032e16136544be1bb3919a6bb690 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:21:41 -0500
Subject: [PATCH 08/57] More thorough testing + Adding setup and teardown for
 tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 61 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index c80168326..ea59d95e5 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -11,42 +11,59 @@
 
 
 class E2ETests(unittest.TestCase):
-    def test_cleanup(self):
+    def setUp(self):
         # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
         # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
         # never gets to the point where it gets deleted.
         # The final state of the script should be cleaning up after each test run.
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
         auth = (username, password)
         index = "my_index"
         doc_id = '7'
-        delete_index(source_endpoint, index, auth)
-        delete_document(source_endpoint, index, doc_id, auth)
+        delete_index(proxy_endpoint, index, auth)
+        delete_document(proxy_endpoint, index, doc_id, auth)
+
+    def tearDown(self):
+        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
+        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
+        # never gets to the point where it gets deleted.
+        # The final state of the script should be cleaning up after each test run.
+        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
+        doc_id = '7'
+        delete_index(proxy_endpoint, index, auth)
+        delete_document(proxy_endpoint, index, doc_id, auth)
 
     def test_index(self):
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
         auth = (username, password)
         index = "my_index"
 
-        source_response = create_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = create_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         sleep(3)
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
         sleep(3)
 
-        source_response = delete_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = delete_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
         # on the target cluster, so the check will find the index and return a code 200 instead of 404.
 
@@ -55,9 +72,12 @@ def test_index(self):
         sleep(3)
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_document(self):
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
@@ -65,16 +85,18 @@ def test_document(self):
         index = "my_index"
         doc_id = '7'
 
-        source_response = create_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = create_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
-        source_response = create_document(source_endpoint, index, doc_id, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.CREATED)
+        proxy_response = create_document(proxy_endpoint, index, doc_id, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
         # TODO: check comparator's results here.
         # TODO: compare two documents below instead of just confirming they exist
 
@@ -88,20 +110,23 @@ def test_document(self):
         target_content = get_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(source_content, target_content)
 
-        source_response = delete_document(source_endpoint, index, doc_id, auth)
+        proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         target_response = check_document(target_endpoint, index, doc_id, auth)
-
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-        source_response = delete_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = delete_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_unsupported_transformation(self):
         self.assertTrue(True)

From 3169fc3c710ec269cf87367194ea43b45339727f Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:31:07 -0500
Subject: [PATCH 09/57] Adding test numbers

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index ea59d95e5..942248298 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -3,9 +3,10 @@
 from http import HTTPStatus
 import unittest
 import os
-
+import logging
 from time import sleep
 
+logger = logging.getLogger(__name__)
 
 # Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
 
@@ -39,7 +40,7 @@ def tearDown(self):
         delete_index(proxy_endpoint, index, auth)
         delete_document(proxy_endpoint, index, doc_id, auth)
 
-    def test_index(self):
+    def test_001_index(self):
         proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
@@ -75,7 +76,7 @@ def test_index(self):
         source_response = check_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-    def test_document(self):
+    def test_002_document(self):
         proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
@@ -128,10 +129,10 @@ def test_document(self):
         source_response = check_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-    def test_unsupported_transformation(self):
+    def test_003_unsupported_transformation(self):
         self.assertTrue(True)
 
-    def test_supported_transformation(self):
+    def test_004_supported_transformation(self):
         self.assertTrue(True)
 
 

From ab2c8ed89d8d8320840d7f1b61b15e4efb4b9cc6 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:32:11 -0500
Subject: [PATCH 10/57] Removing old comments

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index 942248298..bc035d5eb 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -13,10 +13,6 @@
 
 class E2ETests(unittest.TestCase):
     def setUp(self):
-        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
-        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
-        # never gets to the point where it gets deleted.
-        # The final state of the script should be cleaning up after each test run.
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
@@ -27,10 +23,6 @@ def setUp(self):
         delete_document(proxy_endpoint, index, doc_id, auth)
 
     def tearDown(self):
-        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
-        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
-        # never gets to the point where it gets deleted.
-        # The final state of the script should be cleaning up after each test run.
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')

From a99a8707703bcac6c76ab9860c3c59cf79d9a33f Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 11:33:31 -0500
Subject: [PATCH 11/57] Adding retry logic

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 50 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index bc035d5eb..99c9435ee 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -4,13 +4,29 @@
 import unittest
 import os
 import logging
-from time import sleep
+import time
 
 logger = logging.getLogger(__name__)
 
 # Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
 
 
+def retry_request(request, args=(), max_attempts=10, delay=0.5, expectedStatusCode=None):
+    for attempt in range(1, max_attempts + 1):
+        try:
+            result = request(*args)
+            if result.status_code == expectedStatusCode:
+                return result
+            else:
+                logger.error(f"Status code returned: {result.status_code} did not"
+                             f" match the expected status code: {expectedStatusCode}")
+        except Exception:
+            logger.error(f"Trying again in {delay} seconds")
+            time.sleep(delay)
+
+    raise Exception(f"All {max_attempts} attempts failed.")
+
+
 class E2ETests(unittest.TestCase):
     def setUp(self):
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
@@ -44,28 +60,28 @@ def test_001_index(self):
         proxy_response = create_index(proxy_endpoint, index, auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        sleep(3)
-
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
-        sleep(3)
-
-        proxy_response = delete_index(proxy_endpoint, index, auth)
+        proxy_response = retry_request(delete_index, args=(proxy_endpoint, index, auth),
+                                       expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
         # on the target cluster, so the check will find the index and return a code 200 instead of 404.
 
         # TODO: check comparator's results here.
 
-        sleep(3)
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_002_document(self):
@@ -106,9 +122,11 @@ def test_002_document(self):
         proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = check_document(target_endpoint, index, doc_id, auth)
+        target_response = retry_request(check_document, args=(target_endpoint, index, doc_id, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_document(source_endpoint, index, doc_id, auth)
+        source_response = retry_request(check_document, args=(source_endpoint, index, doc_id, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
         proxy_response = delete_index(proxy_endpoint, index, auth)
@@ -116,9 +134,11 @@ def test_002_document(self):
 
         # TODO: check comparator's results here.
 
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_003_unsupported_transformation(self):

From f937d8e3a5595120322ee802a7cde9f2b55c34ed Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 10:52:44 -0500
Subject: [PATCH 12/57] Adding create index and document functions

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 test/operations.py

diff --git a/test/operations.py b/test/operations.py
new file mode 100644
index 000000000..a25ebbf90
--- /dev/null
+++ b/test/operations.py
@@ -0,0 +1,55 @@
+import requests, json
+
+class Operations:
+    @staticmethod
+    def create_index(endpoint, index_name, auth=None, data=None):
+        response = requests.put(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to create index')
+            print(response.text)
+        else:
+            print('Created index successfully')
+            print(response.text)
+
+        pass
+
+    @staticmethod
+    def create_document(endpoint, index_name, auth=None):
+        doc_id = '7'
+        document = {
+            'title': 'Test Document',
+            'content': 'This is a sample document for testing OpenSearch.'
+        }
+        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+        headers = {'Content-Type': 'application/json'}
+
+        # Create the document
+        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+        headers = {'Content-Type': 'application/json'}
+
+        response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
+
+        if response.status_code != 201:
+            print('Failed to create document')
+            print(response.text)
+        else:
+            print('Created document successfully')
+            print(response.text)
+        pass
+
+
+
+def main():
+
+    username = '*'  # Enter master username and password
+    password = '*'
+    auth = (username, password)
+    endpoint = '*:443'  # Replace * with domain endpoint
+    index = 'my_index'
+
+    Operations.create_index(endpoint, index, auth)
+    Operations.create_document(endpoint, index, auth)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 869f65719d648cb9927648a02da5457ce8099513 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 12:02:29 -0500
Subject: [PATCH 13/57] Adding delete index function

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index a25ebbf90..03335e396 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -14,8 +14,20 @@ def create_index(endpoint, index_name, auth=None, data=None):
         pass
 
     @staticmethod
-    def create_document(endpoint, index_name, auth=None):
-        doc_id = '7'
+    def delete_index(endpoint, index_name, auth=None, data=None):
+        response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to delete index')
+            print(response.text)
+        else:
+            print('Deleted index successfully')
+            print(response.text)
+
+    pass
+
+
+    @staticmethod
+    def create_document(endpoint, index_name, doc_id, auth=None):
         document = {
             'title': 'Test Document',
             'content': 'This is a sample document for testing OpenSearch.'
@@ -23,10 +35,6 @@ def create_document(endpoint, index_name, auth=None):
         url = f'{endpoint}/{index_name}/_doc/{doc_id}'
         headers = {'Content-Type': 'application/json'}
 
-        # Create the document
-        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
-        headers = {'Content-Type': 'application/json'}
-
         response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
 
         if response.status_code != 201:
@@ -44,11 +52,13 @@ def main():
     username = '*'  # Enter master username and password
     password = '*'
     auth = (username, password)
-    endpoint = '*:443'  # Replace * with domain endpoint
+    endpoint = '*'   # Replace * with domain endpoint
     index = 'my_index'
+    doc_id = '7'
 
     Operations.create_index(endpoint, index, auth)
-    Operations.create_document(endpoint, index, auth)
+    Operations.create_document(endpoint, index, doc_id, auth)
+    Operations.delete_index(endpoint, index, auth)
 
 
 if __name__ == "__main__":

From 4cd922832e026c4dcee70b72cc7832f59401689a Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 1 Jun 2023 12:06:27 -0500
Subject: [PATCH 14/57] Adding delete document function

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/test/operations.py b/test/operations.py
index 03335e396..27828894b 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -25,6 +25,17 @@ def delete_index(endpoint, index_name, auth=None, data=None):
 
     pass
 
+    @staticmethod
+    def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+        response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
+        if response.status_code != 200:
+            print('Failed to delete document')
+            print(response.text)
+        else:
+            print('Deleted document successfully')
+            print(response.text)
+
+    pass
 
     @staticmethod
     def create_document(endpoint, index_name, doc_id, auth=None):
@@ -52,14 +63,16 @@ def main():
     username = '*'  # Enter master username and password
     password = '*'
     auth = (username, password)
-    endpoint = '*'   # Replace * with domain endpoint
+    endpoint = '*:443'  # Replace * with domain endpoint
     index = 'my_index'
     doc_id = '7'
 
     Operations.create_index(endpoint, index, auth)
     Operations.create_document(endpoint, index, doc_id, auth)
+    Operations.delete_document(endpoint, index, doc_id, auth)
     Operations.delete_index(endpoint, index, auth)
 
 
+
 if __name__ == "__main__":
     main()
\ No newline at end of file

From 2e69a28f08acf60965ad891f51e334695cae8e61 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 2 Jun 2023 01:55:23 -0500
Subject: [PATCH 15/57] Adds some tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 52 +++++++++++++++++++++++++++--------------
 test/tests.py      | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 18 deletions(-)
 create mode 100644 test/tests.py

diff --git a/test/operations.py b/test/operations.py
index 27828894b..941c9abbd 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,41 +1,57 @@
-import requests, json
+from http import HTTPStatus
+import requests
+import json
+import os
+
 
 class Operations:
     @staticmethod
     def create_index(endpoint, index_name, auth=None, data=None):
         response = requests.put(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to create index')
             print(response.text)
         else:
             print('Created index successfully')
             print(response.text)
 
-        pass
+        return response
+
+    @staticmethod
+    def check_index(endpoint, index_name, auth=None, data=None):
+        response = requests.head(f'{endpoint}/{index_name}', auth=auth)
+        if response.status_code != HTTPStatus.OK:
+            print('Failed to create index')
+            print(response.text)
+        else:
+            print('Created index successfully')
+            print(response.text)
+
+        return response
 
     @staticmethod
     def delete_index(endpoint, index_name, auth=None, data=None):
         response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to delete index')
             print(response.text)
         else:
             print('Deleted index successfully')
             print(response.text)
 
-    pass
+        return response
 
     @staticmethod
     def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
         response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
-        if response.status_code != 200:
+        if response.status_code != HTTPStatus.OK:
             print('Failed to delete document')
             print(response.text)
         else:
             print('Deleted document successfully')
             print(response.text)
 
-    pass
+        return response
 
     @staticmethod
     def create_document(endpoint, index_name, doc_id, auth=None):
@@ -48,31 +64,31 @@ def create_document(endpoint, index_name, doc_id, auth=None):
 
         response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
 
-        if response.status_code != 201:
+        if response.status_code != HTTPStatus.CREATED:
             print('Failed to create document')
             print(response.text)
         else:
             print('Created document successfully')
             print(response.text)
-        pass
 
+        return response
 
 
 def main():
 
-    username = '*'  # Enter master username and password
-    password = '*'
+    username = os.getenv('USERNAME')
+    password = os.getenv('PASSWORD')
+    endpoint = os.getenv('ENDPOINT') # Dont forget port number.
+
     auth = (username, password)
-    endpoint = '*:443'  # Replace * with domain endpoint
     index = 'my_index'
     doc_id = '7'
 
-    Operations.create_index(endpoint, index, auth)
-    Operations.create_document(endpoint, index, doc_id, auth)
-    Operations.delete_document(endpoint, index, doc_id, auth)
-    Operations.delete_index(endpoint, index, auth)
-
+    response1 = Operations.create_index(endpoint, index)
+    response2 = Operations.create_document(endpoint, index, doc_id, auth)
+    response3 = Operations.delete_document(endpoint, index, doc_id, auth)
+    response4 = Operations.delete_index(endpoint, index)
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/test/tests.py b/test/tests.py
new file mode 100644
index 000000000..1e298a177
--- /dev/null
+++ b/test/tests.py
@@ -0,0 +1,58 @@
+from operations import Operations
+from http import HTTPStatus
+import unittest
+import os
+
+# This code is probably existing as a placeholder only for testing purposes,
+# final code here will be using comparator's results. Also, won't be sending request directly to target cluster,
+# and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
+# the triples log file.
+# TODO: Add endpoints to be read from environment variable.
+class MyTestCase(unittest.TestCase):
+    def test_index(self):
+        endpoint1=os.getenv('ENDPOINT_1')
+        endpoint2=os.getenv('ENDPOINT_2')
+        index="my_index"
+        response1 = Operations.create_index(endpoint1, index)
+        response2 = Operations.create_index(endpoint2, index)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+
+        response1 = Operations.delete_index(endpoint1, index)
+        response2 = Operations.delete_index(endpoint2, index)
+        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+
+    def test_document(self):
+        endpoint1=os.getenv('ENDPOINT_1')
+        endpoint2=os.getenv('ENDPOINT_2')
+        index="my_index2"
+        doc_id='7'
+        response1 = Operations.create_index(endpoint1, index)
+        response2 = Operations.create_index(endpoint2, index)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+
+
+
+        response1 = Operations.create_document(endpoint1, index, doc_id)
+        response2 = Operations.create_document(endpoint2, index, doc_id)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+        self.assert_(response1.status_code == HTTPStatus.CREATED and response2.status_code == HTTPStatus.CREATED)
+
+        response1 = Operations.delete_document(endpoint1, index, doc_id)
+        response2 = Operations.delete_document(endpoint2, index, doc_id)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+
+        response1 = Operations.delete_index(endpoint1, index)
+        response2 = Operations.delete_index(endpoint2, index)
+        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
+        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+
+    def test_unsupported_transformation(self):
+
+        pass
+
+    def test_supported_transformation(self):
+        pass
+
+if __name__ == '__main__':
+    unittest.main()

From 2e80f15ba00bd0aae4fc1e61fa8e23d09b5a9674 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 2 Jun 2023 14:12:12 -0500
Subject: [PATCH 16/57] Add default value for some environment variables +
 fixing some linting errors

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py |  6 +++---
 test/tests.py      | 24 +++++++++++++-----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 941c9abbd..06fb4a04d 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -76,9 +76,9 @@ def create_document(endpoint, index_name, doc_id, auth=None):
 
 def main():
 
-    username = os.getenv('USERNAME')
-    password = os.getenv('PASSWORD')
-    endpoint = os.getenv('ENDPOINT') # Dont forget port number.
+    username = os.getenv('USERNAME', '')
+    password = os.getenv('PASSWORD', '')
+    endpoint = os.getenv('ENDPOINT')  # Dont forget port number.
 
     auth = (username, password)
     index = 'my_index'
diff --git a/test/tests.py b/test/tests.py
index 1e298a177..521bc40c3 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -8,11 +8,13 @@
 # and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
 # the triples log file.
 # TODO: Add endpoints to be read from environment variable.
+
+
 class MyTestCase(unittest.TestCase):
     def test_index(self):
-        endpoint1=os.getenv('ENDPOINT_1')
-        endpoint2=os.getenv('ENDPOINT_2')
-        index="my_index"
+        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
+        endpoint2 = os.getenv('ENDPOINT_2')
+        index = "my_index"
         response1 = Operations.create_index(endpoint1, index)
         response2 = Operations.create_index(endpoint2, index)
         self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
@@ -20,19 +22,17 @@ def test_index(self):
 
         response1 = Operations.delete_index(endpoint1, index)
         response2 = Operations.delete_index(endpoint2, index)
-        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
     def test_document(self):
-        endpoint1=os.getenv('ENDPOINT_1')
-        endpoint2=os.getenv('ENDPOINT_2')
-        index="my_index2"
-        doc_id='7'
+        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
+        endpoint2 = os.getenv('ENDPOINT_2')
+        index = "my_index2"
+        doc_id = '7'
         response1 = Operations.create_index(endpoint1, index)
         response2 = Operations.create_index(endpoint2, index)
         self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
-
-
         response1 = Operations.create_document(endpoint1, index, doc_id)
         response2 = Operations.create_document(endpoint2, index, doc_id)
         # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
@@ -45,14 +45,16 @@ def test_document(self):
         response1 = Operations.delete_index(endpoint1, index)
         response2 = Operations.delete_index(endpoint2, index)
         # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code==HTTPStatus.OK and response2.status_code==HTTPStatus.OK)
+        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
     def test_unsupported_transformation(self):
 
         pass
 
     def test_supported_transformation(self):
+
         pass
 
+
 if __name__ == '__main__':
     unittest.main()

From 25ac367d9829d715a2d96138911279d7c514f4b2 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 5 Jun 2023 13:40:11 -0500
Subject: [PATCH 17/57] Addressing feedback + Actually using the proxy and
 replayer

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 127 ++++++++++++++++++---------------------------
 test/tests.py      | 118 ++++++++++++++++++++++++++++-------------
 2 files changed, 132 insertions(+), 113 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 06fb4a04d..648f7c6c4 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,93 +1,68 @@
-from http import HTTPStatus
 import requests
 import json
 import os
 
 
-class Operations:
-    @staticmethod
-    def create_index(endpoint, index_name, auth=None, data=None):
-        response = requests.put(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to create index')
-            print(response.text)
-        else:
-            print('Created index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def check_index(endpoint, index_name, auth=None, data=None):
-        response = requests.head(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to create index')
-            print(response.text)
-        else:
-            print('Created index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def delete_index(endpoint, index_name, auth=None, data=None):
-        response = requests.delete(f'{endpoint}/{index_name}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to delete index')
-            print(response.text)
-        else:
-            print('Deleted index successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
-        response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth)
-        if response.status_code != HTTPStatus.OK:
-            print('Failed to delete document')
-            print(response.text)
-        else:
-            print('Deleted document successfully')
-            print(response.text)
-
-        return response
-
-    @staticmethod
-    def create_document(endpoint, index_name, doc_id, auth=None):
-        document = {
-            'title': 'Test Document',
-            'content': 'This is a sample document for testing OpenSearch.'
-        }
-        url = f'{endpoint}/{index_name}/_doc/{doc_id}'
-        headers = {'Content-Type': 'application/json'}
-
-        response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth)
-
-        if response.status_code != HTTPStatus.CREATED:
-            print('Failed to create document')
-            print(response.text)
-        else:
-            print('Created document successfully')
-            print(response.text)
-
-        return response
+def create_index(endpoint, index_name, auth=None, data=None):
+    response = requests.put(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def check_index(endpoint, index_name, auth=None, data=None):
+    response = requests.get(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def delete_index(endpoint, index_name, auth=None, data=None):
+    response = requests.delete(f'{endpoint}/{index_name}', auth=auth, verify=False)
+
+    return response
+
+
+def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+    response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth, verify=False)
+
+    return response
+
+
+def create_document(endpoint, index_name, doc_id, auth=None):
+    document = {
+        'title': 'Test Document',
+        'content': 'This is a sample document for testing OpenSearch.'
+    }
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.put(url, headers=headers, data=json.dumps(document), auth=auth, verify=False)
+
+    return response
+
+
+def get_document(endpoint, index_name, doc_id, auth=None):
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.get(url, headers=headers, auth=auth, verify=False)
+
+    return response
 
 
 def main():
 
-    username = os.getenv('USERNAME', '')
-    password = os.getenv('PASSWORD', '')
-    endpoint = os.getenv('ENDPOINT')  # Dont forget port number.
+    username = os.getenv('USERNAME', 'admin')
+    password = os.getenv('PASSWORD', 'admin')
+    endpoint = os.getenv('ENDPOINT', 'https://localhost:9200')  # Dont forget port number.
 
     auth = (username, password)
     index = 'my_index'
     doc_id = '7'
 
-    response1 = Operations.create_index(endpoint, index)
-    response2 = Operations.create_document(endpoint, index, doc_id, auth)
-    response3 = Operations.delete_document(endpoint, index, doc_id, auth)
-    response4 = Operations.delete_index(endpoint, index)
+    create_index(endpoint, index, auth)
+    create_document(endpoint, index, doc_id, auth)
+    delete_document(endpoint, index, doc_id, auth)
+    delete_index(endpoint, index, auth)
 
 
 if __name__ == "__main__":
diff --git a/test/tests.py b/test/tests.py
index 521bc40c3..b7adc9c0e 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,59 +1,103 @@
-from operations import Operations
+from operations import create_index, check_index, create_document, delete_document, get_document, delete_index
 from http import HTTPStatus
 import unittest
 import os
 
-# This code is probably existing as a placeholder only for testing purposes,
-# final code here will be using comparator's results. Also, won't be sending request directly to target cluster,
-# and will be using the replayer instead, so we're able to check comparator's results once the replayer outputs
-# the triples log file.
-# TODO: Add endpoints to be read from environment variable.
+from time import sleep
 
 
-class MyTestCase(unittest.TestCase):
+# Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
+
+
+class E2ETests(unittest.TestCase):
+    def test_cleanup(self):
+        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
+        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
+        # never gets to the point where it gets deleted.
+        # The final state of the script should be cleaning up after each test run.
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
+        doc_id = '7'
+        delete_index(source_endpoint, index, auth)
+        delete_document(source_endpoint, index, doc_id, auth)
+
     def test_index(self):
-        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
-        endpoint2 = os.getenv('ENDPOINT_2')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
         index = "my_index"
-        response1 = Operations.create_index(endpoint1, index)
-        response2 = Operations.create_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
 
-        response1 = Operations.delete_index(endpoint1, index)
-        response2 = Operations.delete_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        source_response = create_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
+
+        # TODO: check comparator's results here.
+
+        source_response = delete_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
+        # on the target cluster, so the check will find the index and return a code 200 instead of 404.
+
+        # TODO: check comparator's results here.
+
+        sleep(3)
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_document(self):
-        endpoint1 = os.getenv('ENDPOINT_1', 'http://localhost:9200')
-        endpoint2 = os.getenv('ENDPOINT_2')
-        index = "my_index2"
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
         doc_id = '7'
-        response1 = Operations.create_index(endpoint1, index)
-        response2 = Operations.create_index(endpoint2, index)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
 
-        response1 = Operations.create_document(endpoint1, index, doc_id)
-        response2 = Operations.create_document(endpoint2, index, doc_id)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code == HTTPStatus.CREATED and response2.status_code == HTTPStatus.CREATED)
+        source_response = create_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        response1 = Operations.delete_document(endpoint1, index, doc_id)
-        response2 = Operations.delete_document(endpoint2, index, doc_id)
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
-        response1 = Operations.delete_index(endpoint1, index)
-        response2 = Operations.delete_index(endpoint2, index)
-        # TODO: check comparator's results here, and add more logging to know where exactly the test fails, if it does.
-        self.assert_(response1.status_code == HTTPStatus.OK and response2.status_code == HTTPStatus.OK)
+        # TODO: check comparator's results here.
 
-    def test_unsupported_transformation(self):
+        source_response = create_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.CREATED)
+        # TODO: check comparator's results here.
+        # TODO: compare two documents below instead of just confirming they exist
 
-        pass
+        source_response = get_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-    def test_supported_transformation(self):
+        target_response = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
+
+        source_response = delete_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+
+        target_response = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+
+        source_response = delete_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        pass
+        # TODO: check comparator's results here.
+
+        target_response = check_index(target_endpoint, index, auth)
+        self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+
+    def test_unsupported_transformation(self):
+        self.assertTrue(True)
+
+    def test_supported_transformation(self):
+        self.assertTrue(True)
 
 
 if __name__ == '__main__':

From c0e7fc2444faad520abfefd85ed82d1437f2784c Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 02:11:33 -0500
Subject: [PATCH 18/57] Adding typehints + improving tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 26 ++++++++++++++++++++------
 test/tests.py      | 16 +++++++++++++---
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 648f7c6c4..36ff06e35 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -2,32 +2,35 @@
 import json
 import os
 
+from typing import Optional, Tuple
 
-def create_index(endpoint, index_name, auth=None, data=None):
+
+def create_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.put(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def check_index(endpoint, index_name, auth=None, data=None):
+def check_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.get(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def delete_index(endpoint, index_name, auth=None, data=None):
+def delete_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
     response = requests.delete(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def delete_document(endpoint, index_name, doc_id, auth=None, data=None):
+def delete_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
     response = requests.delete(f'{endpoint}/{index_name}/_doc/{doc_id}', auth=auth, verify=False)
 
     return response
 
 
-def create_document(endpoint, index_name, doc_id, auth=None):
+def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None,
+                    data: Optional[dict] = None):
     document = {
         'title': 'Test Document',
         'content': 'This is a sample document for testing OpenSearch.'
@@ -40,7 +43,7 @@ def create_document(endpoint, index_name, doc_id, auth=None):
     return response
 
 
-def get_document(endpoint, index_name, doc_id, auth=None):
+def check_document(endpoint, index_name, doc_id, auth=None):
     url = f'{endpoint}/{index_name}/_doc/{doc_id}'
     headers = {'Content-Type': 'application/json'}
 
@@ -49,6 +52,17 @@ def get_document(endpoint, index_name, doc_id, auth=None):
     return response
 
 
+def get_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
+    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
+    headers = {'Content-Type': 'application/json'}
+
+    response = requests.get(url, headers=headers, auth=auth, verify=False)
+    document = response.json()
+    content = document['_source']
+
+    return content
+
+
 def main():
 
     username = os.getenv('USERNAME', 'admin')
diff --git a/test/tests.py b/test/tests.py
index b7adc9c0e..c80168326 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,4 +1,5 @@
 from operations import create_index, check_index, create_document, delete_document, get_document, delete_index
+from operations import check_document
 from http import HTTPStatus
 import unittest
 import os
@@ -35,11 +36,15 @@ def test_index(self):
         source_response = create_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
+        sleep(3)
+
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
+        sleep(3)
+
         source_response = delete_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
@@ -73,16 +78,21 @@ def test_document(self):
         # TODO: check comparator's results here.
         # TODO: compare two documents below instead of just confirming they exist
 
-        source_response = get_document(source_endpoint, index, doc_id, auth)
+        source_response = check_document(source_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = get_document(target_endpoint, index, doc_id, auth)
+        target_response = check_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
+        source_content = get_document(source_endpoint, index, doc_id, auth)
+        target_content = get_document(target_endpoint, index, doc_id, auth)
+        self.assertEqual(source_content, target_content)
+
         source_response = delete_document(source_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = get_document(target_endpoint, index, doc_id, auth)
+        target_response = check_document(target_endpoint, index, doc_id, auth)
+
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
 
         source_response = delete_index(source_endpoint, index, auth)

From 01e606b4402beb3c77d47120f53a017f7d5c389d Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:21:41 -0500
Subject: [PATCH 19/57] More thorough testing + Adding setup and teardown for
 tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 61 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index c80168326..ea59d95e5 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -11,42 +11,59 @@
 
 
 class E2ETests(unittest.TestCase):
-    def test_cleanup(self):
+    def setUp(self):
         # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
         # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
         # never gets to the point where it gets deleted.
         # The final state of the script should be cleaning up after each test run.
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
         auth = (username, password)
         index = "my_index"
         doc_id = '7'
-        delete_index(source_endpoint, index, auth)
-        delete_document(source_endpoint, index, doc_id, auth)
+        delete_index(proxy_endpoint, index, auth)
+        delete_document(proxy_endpoint, index, doc_id, auth)
+
+    def tearDown(self):
+        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
+        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
+        # never gets to the point where it gets deleted.
+        # The final state of the script should be cleaning up after each test run.
+        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        username = os.getenv('username', 'admin')
+        password = os.getenv('password', 'admin')
+        auth = (username, password)
+        index = "my_index"
+        doc_id = '7'
+        delete_index(proxy_endpoint, index, auth)
+        delete_document(proxy_endpoint, index, doc_id, auth)
 
     def test_index(self):
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
         auth = (username, password)
         index = "my_index"
 
-        source_response = create_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = create_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         sleep(3)
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
         sleep(3)
 
-        source_response = delete_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = delete_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
         # on the target cluster, so the check will find the index and return a code 200 instead of 404.
 
@@ -55,9 +72,12 @@ def test_index(self):
         sleep(3)
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_document(self):
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
+        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
@@ -65,16 +85,18 @@ def test_document(self):
         index = "my_index"
         doc_id = '7'
 
-        source_response = create_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = create_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
-        source_response = create_document(source_endpoint, index, doc_id, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.CREATED)
+        proxy_response = create_document(proxy_endpoint, index, doc_id, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
         # TODO: check comparator's results here.
         # TODO: compare two documents below instead of just confirming they exist
 
@@ -88,20 +110,23 @@ def test_document(self):
         target_content = get_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(source_content, target_content)
 
-        source_response = delete_document(source_endpoint, index, doc_id, auth)
+        proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         target_response = check_document(target_endpoint, index, doc_id, auth)
-
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_document(source_endpoint, index, doc_id, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-        source_response = delete_index(source_endpoint, index, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = delete_index(proxy_endpoint, index, auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
         target_response = check_index(target_endpoint, index, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
+        source_response = check_index(source_endpoint, index, auth)
+        self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_unsupported_transformation(self):
         self.assertTrue(True)

From 7628601b7ec637c42a70f8a923fb5e7a55b037ac Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:31:07 -0500
Subject: [PATCH 20/57] Adding test numbers

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index ea59d95e5..942248298 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -3,9 +3,10 @@
 from http import HTTPStatus
 import unittest
 import os
-
+import logging
 from time import sleep
 
+logger = logging.getLogger(__name__)
 
 # Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
 
@@ -39,7 +40,7 @@ def tearDown(self):
         delete_index(proxy_endpoint, index, auth)
         delete_document(proxy_endpoint, index, doc_id, auth)
 
-    def test_index(self):
+    def test_001_index(self):
         proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
@@ -75,7 +76,7 @@ def test_index(self):
         source_response = check_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-    def test_document(self):
+    def test_002_document(self):
         proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
@@ -128,10 +129,10 @@ def test_document(self):
         source_response = check_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-    def test_unsupported_transformation(self):
+    def test_003_unsupported_transformation(self):
         self.assertTrue(True)
 
-    def test_supported_transformation(self):
+    def test_004_supported_transformation(self):
         self.assertTrue(True)
 
 

From 23745acce92b302dbd51872a4b86998075ed3f6e Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 10:32:11 -0500
Subject: [PATCH 21/57] Removing old comments

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index 942248298..bc035d5eb 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -13,10 +13,6 @@
 
 class E2ETests(unittest.TestCase):
     def setUp(self):
-        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
-        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
-        # never gets to the point where it gets deleted.
-        # The final state of the script should be cleaning up after each test run.
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')
@@ -27,10 +23,6 @@ def setUp(self):
         delete_document(proxy_endpoint, index, doc_id, auth)
 
     def tearDown(self):
-        # This is not necessarily a test, but if a test fails for whatever reason, it can cause next tests to fail even
-        # though they could've passed, due to a previous test e.g failing at a point where it creates the index, but it
-        # never gets to the point where it gets deleted.
-        # The final state of the script should be cleaning up after each test run.
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
         username = os.getenv('username', 'admin')
         password = os.getenv('password', 'admin')

From e36530595a30dd0cc3642d2a89c9f41f698dce82 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 7 Jun 2023 11:33:31 -0500
Subject: [PATCH 22/57] Adding retry logic

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 50 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index bc035d5eb..99c9435ee 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -4,13 +4,29 @@
 import unittest
 import os
 import logging
-from time import sleep
+import time
 
 logger = logging.getLogger(__name__)
 
 # Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
 
 
+def retry_request(request, args=(), max_attempts=10, delay=0.5, expectedStatusCode=None):
+    for attempt in range(1, max_attempts + 1):
+        try:
+            result = request(*args)
+            if result.status_code == expectedStatusCode:
+                return result
+            else:
+                logger.error(f"Status code returned: {result.status_code} did not"
+                             f" match the expected status code: {expectedStatusCode}")
+        except Exception:
+            logger.error(f"Trying again in {delay} seconds")
+            time.sleep(delay)
+
+    raise Exception(f"All {max_attempts} attempts failed.")
+
+
 class E2ETests(unittest.TestCase):
     def setUp(self):
         proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
@@ -44,28 +60,28 @@ def test_001_index(self):
         proxy_response = create_index(proxy_endpoint, index, auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        sleep(3)
-
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # TODO: check comparator's results here.
 
-        sleep(3)
-
-        proxy_response = delete_index(proxy_endpoint, index, auth)
+        proxy_response = retry_request(delete_index, args=(proxy_endpoint, index, auth),
+                                       expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
         # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
         # on the target cluster, so the check will find the index and return a code 200 instead of 404.
 
         # TODO: check comparator's results here.
 
-        sleep(3)
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_002_document(self):
@@ -106,9 +122,11 @@ def test_002_document(self):
         proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = check_document(target_endpoint, index, doc_id, auth)
+        target_response = retry_request(check_document, args=(target_endpoint, index, doc_id, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_document(source_endpoint, index, doc_id, auth)
+        source_response = retry_request(check_document, args=(source_endpoint, index, doc_id, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
         proxy_response = delete_index(proxy_endpoint, index, auth)
@@ -116,9 +134,11 @@ def test_002_document(self):
 
         # TODO: check comparator's results here.
 
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
+                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_003_unsupported_transformation(self):

From 7483ab93c78eb9c19120fea5075e6ede35b92297 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 9 Jun 2023 18:03:33 -0500
Subject: [PATCH 23/57] Adding test for jupyter notebook + removing no longer
 neccessary code

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py | 22 ----------------------
 test/tests.py      | 29 ++++++++++++-----------------
 2 files changed, 12 insertions(+), 39 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 36ff06e35..cdf4f1e4d 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,7 +1,5 @@
 import requests
 import json
-import os
-
 from typing import Optional, Tuple
 
 
@@ -61,23 +59,3 @@ def get_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tup
     content = document['_source']
 
     return content
-
-
-def main():
-
-    username = os.getenv('USERNAME', 'admin')
-    password = os.getenv('PASSWORD', 'admin')
-    endpoint = os.getenv('ENDPOINT', 'https://localhost:9200')  # Dont forget port number.
-
-    auth = (username, password)
-    index = 'my_index'
-    doc_id = '7'
-
-    create_index(endpoint, index, auth)
-    create_document(endpoint, index, doc_id, auth)
-    delete_document(endpoint, index, doc_id, auth)
-    delete_index(endpoint, index, auth)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/test/tests.py b/test/tests.py
index 99c9435ee..6eee5ec68 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -5,6 +5,7 @@
 import os
 import logging
 import time
+import requests
 
 logger = logging.getLogger(__name__)
 
@@ -67,15 +68,9 @@ def test_001_index(self):
                                         expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        # TODO: check comparator's results here.
-
         proxy_response = retry_request(delete_index, args=(proxy_endpoint, index, auth),
                                        expectedStatusCode=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
-        # Add a stall here maybe? Sometimes the check index function is performed before the delete request is replayed
-        # on the target cluster, so the check will find the index and return a code 200 instead of 404.
-
-        # TODO: check comparator's results here.
 
         target_response = retry_request(check_index, args=(target_endpoint, index, auth),
                                         expectedStatusCode=HTTPStatus.NOT_FOUND)
@@ -94,6 +89,7 @@ def test_002_document(self):
         index = "my_index"
         doc_id = '7'
 
+        # Creating an index, then asserting that the index was created on both targets.
         proxy_response = create_index(proxy_endpoint, index, auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
@@ -102,12 +98,9 @@ def test_002_document(self):
         source_response = check_index(source_endpoint, index, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        # TODO: check comparator's results here.
-
+        # Creating a document, then asserting that the document was created on both targets.
         proxy_response = create_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
-        # TODO: check comparator's results here.
-        # TODO: compare two documents below instead of just confirming they exist
 
         source_response = check_document(source_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
@@ -115,10 +108,12 @@ def test_002_document(self):
         target_response = check_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
+        # Comparing the document's content on both targets, asserting that they match.
         source_content = get_document(source_endpoint, index, doc_id, auth)
         target_content = get_document(target_endpoint, index, doc_id, auth)
         self.assertEqual(source_content, target_content)
 
+        # Deleting the document that was created then asserting that it was deleted on both targets.
         proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
@@ -129,11 +124,10 @@ def test_002_document(self):
                                         expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
+        # Deleting the index that was created then asserting that it was deleted on both targets.
         proxy_response = delete_index(proxy_endpoint, index, auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        # TODO: check comparator's results here.
-
         target_response = retry_request(check_index, args=(target_endpoint, index, auth),
                                         expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
@@ -141,12 +135,13 @@ def test_002_document(self):
                                         expectedStatusCode=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
-    def test_003_unsupported_transformation(self):
-        self.assertTrue(True)
-
-    def test_004_supported_transformation(self):
-        self.assertTrue(True)
+    def test_003_jupyterAwake(self):
+        # Making sure that the Jupyter notebook is up and can be reached.
+        jupyter_endpoint = os.getenv('JUPYTER_NOTEBOOK', 'http://localhost:8888/api')
+        response = requests.get(jupyter_endpoint)
+        self.assertEqual(response.status_code, HTTPStatus.OK)
 
 
 if __name__ == '__main__':
     unittest.main()
+

From 7b37e05cab1a28e8fe118807342d9bbf0b6907de Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 12 Jun 2023 03:34:16 -0500
Subject: [PATCH 24/57] Add setup.py and requirements file + fix a lint error.

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/requirements.txt |  9 +++++++++
 test/setup.py         | 14 ++++++++++++++
 test/tests.py         |  4 ----
 3 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 test/requirements.txt
 create mode 100644 test/setup.py

diff --git a/test/requirements.txt b/test/requirements.txt
new file mode 100644
index 000000000..3a293bea2
--- /dev/null
+++ b/test/requirements.txt
@@ -0,0 +1,9 @@
+certifi==2023.5.7
+charset-normalizer==3.1.0
+idna==3.4
+iniconfig==2.0.0
+packaging==23.1
+pluggy==1.0.0
+pytest==7.3.1
+requests==2.31.0
+urllib3==2.0.3
diff --git a/test/setup.py b/test/setup.py
new file mode 100644
index 000000000..abf7af835
--- /dev/null
+++ b/test/setup.py
@@ -0,0 +1,14 @@
+import setuptools
+
+with open("requirements.txt") as f:
+    required_packages = f.read().splitlines()
+
+setuptools.setup(
+    name='integ_test',
+    version='0.1',
+    description='End-to-End integrations test',
+    author='OpenSearch Migrations',
+    packages=setuptools.find_packages(),
+    python_requires=">=3.9",
+    install_requires=required_packages,
+)
\ No newline at end of file
diff --git a/test/tests.py b/test/tests.py
index 6eee5ec68..177f42aab 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -141,7 +141,3 @@ def test_003_jupyterAwake(self):
         response = requests.get(jupyter_endpoint)
         self.assertEqual(response.status_code, HTTPStatus.OK)
 
-
-if __name__ == '__main__':
-    unittest.main()
-

From 65553d33bb61547c004acdf924bada3d6991477e Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 12 Jun 2023 04:02:16 -0500
Subject: [PATCH 25/57] Update readme file

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 README.md     | 12 ++++++++++++
 test/setup.py |  2 +-
 test/tests.py |  1 -
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 04b622ce8..0335a5027 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,18 @@ This repo will contain code and documentation to assist in migrations and upgrad
 
 Developers must run the "install_githooks.sh" script in order to add the pre-commit hook.
 
+## End-to-End Testing
+
+Developers can run a test script which will verify the end-to-end solution.
+
+To run the test script, users must navigate to the test directory, install the required packages then run the script:
+
+```
+cd test
+pip install -r requirements.txt
+pytest tests.py
+```
+
 ## Security
 
 See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
diff --git a/test/setup.py b/test/setup.py
index abf7af835..d29f76432 100644
--- a/test/setup.py
+++ b/test/setup.py
@@ -11,4 +11,4 @@
     packages=setuptools.find_packages(),
     python_requires=">=3.9",
     install_requires=required_packages,
-)
\ No newline at end of file
+)
diff --git a/test/tests.py b/test/tests.py
index 177f42aab..84d9085aa 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -140,4 +140,3 @@ def test_003_jupyterAwake(self):
         jupyter_endpoint = os.getenv('JUPYTER_NOTEBOOK', 'http://localhost:8888/api')
         response = requests.get(jupyter_endpoint)
         self.assertEqual(response.status_code, HTTPStatus.OK)
-

From 3008832c040446bb9a453ab37791d44ea331ab1f Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 12 Jun 2023 17:00:39 -0500
Subject: [PATCH 26/57] Add readme to TrafficCapture - compatiblity

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 TrafficCapture/README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 TrafficCapture/README.md

diff --git a/TrafficCapture/README.md b/TrafficCapture/README.md
new file mode 100644
index 000000000..c6a20dfef
--- /dev/null
+++ b/TrafficCapture/README.md
@@ -0,0 +1,22 @@
+## Compatibility
+
+Must have Java version 11 installed.
+
+The tools in this directory can only be built if you have Java version 11 installed.
+
+The version is specified in `TrafficCapture/build.gradle` using a Java toolchain, which allows us
+to decouple the Java version used by Gradle itself from Java version used by the tools here.
+
+Any attempt to use a different version will cause the build to fail and will result in the following error (or similar)
+depending on which tool/project is being built. The below example shows the error printed when running e.g `./gradlew 
+trafficCaptureProxyServer:build`
+
+```
+* What went wrong:
+A problem occurred evaluating project ':trafficCaptureProxyServer'.
+> Could not resolve all dependencies for configuration ':trafficCaptureProxyServer:opensearchSecurityPlugin'.
+   > Failed to calculate the value of task ':trafficCaptureProxyServer:compileJava' property 'javaCompiler'.
+      > No matching toolchains found for requested specification: {languageVersion=10, vendor=any, implementation=vendor-specific}.
+         > No locally installed toolchains match (see https://docs.gradle.org/8.0.2/userguide/toolchains.html#sec:auto_detection) and toolchain download repositories have not been configured (see https://docs.gradle.org/8.0.2/userguide/toolchains.html#sub:download_repositories).
+
+```

From 10a1986a32fac7f4cd46d899b10ee248ce7babe1 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Tue, 13 Jun 2023 13:36:40 -0500
Subject: [PATCH 27/57] Fixed the delay code so it actually sleeps while
 retrying requests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index 84d9085aa..4a82546a2 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -19,13 +19,14 @@ def retry_request(request, args=(), max_attempts=10, delay=0.5, expectedStatusCo
             if result.status_code == expectedStatusCode:
                 return result
             else:
-                logger.error(f"Status code returned: {result.status_code} did not"
-                             f" match the expected status code: {expectedStatusCode}")
+                logger.warning(f"Status code returned: {result.status_code} did not"
+                               f" match the expected status code: {expectedStatusCode}."
+                               f" Trying again in {delay} seconds.")
+                time.sleep(delay)
         except Exception:
-            logger.error(f"Trying again in {delay} seconds")
-            time.sleep(delay)
-
-    raise Exception(f"All {max_attempts} attempts failed.")
+            logger.error(f"All {max_attempts} attempts failed.")
+            raise Exception(f"Couldn't get the expected status code: {expectedStatusCode} even after attempting to"
+                            f"retry the request {max_attempts} times.")
 
 
 class E2ETests(unittest.TestCase):

From 3fe174d9d3a0f2091ecb4b4791963eb588244156 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Tue, 13 Jun 2023 14:11:50 -0500
Subject: [PATCH 28/57] Add instructions for running the docker solution

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 TrafficCapture/README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/TrafficCapture/README.md b/TrafficCapture/README.md
index c6a20dfef..5436db474 100644
--- a/TrafficCapture/README.md
+++ b/TrafficCapture/README.md
@@ -1,3 +1,9 @@
+## Running the Docker Solution
+
+While in the TrafficCapture directory, run the following command:
+
+`./gradlew :dockerSolution:composeUp`
+
 ## Compatibility
 
 Must have Java version 11 installed.

From 74fbc9fd1f86ae9d39670ee9789133cc99db6498 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Wed, 14 Jun 2023 15:25:19 -0500
Subject: [PATCH 29/57] Link docker solution readme from readme in root of repo

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 04b622ce8..e5fd7d1ea 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,14 @@ This repo will contain code and documentation to assist in migrations and upgrad
 
 Developers must run the "install_githooks.sh" script in order to add the pre-commit hook.
 
+## Docker Solution
+
+The TrafficCapture directory hosts a set of projects designed to facilitate the proxying and capturing of HTTP
+traffic, which can then be offloaded and replayed to other HTTP server(s).
+
+More documentation on this solution can be found here:
+[TrafficCapture README](TrafficCapture/README.md)
+
 ## Security
 
 See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.

From 7593791d71cb6fdd7a0f0c4b495fe2cbfb700799 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 11:39:56 -0500
Subject: [PATCH 30/57] Refactored retry function + added common functionality
 function + removed get document function

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py |  22 ++-----
 test/tests.py      | 153 +++++++++++++++++++++------------------------
 2 files changed, 76 insertions(+), 99 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index cdf4f1e4d..019ac0446 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -3,19 +3,19 @@
 from typing import Optional, Tuple
 
 
-def create_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
+def create_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None):
     response = requests.put(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def check_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
+def check_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None):
     response = requests.get(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
 
 
-def delete_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None, data: Optional[dict] = None):
+def delete_index(endpoint: str, index_name: str, auth: Optional[Tuple[str, str]] = None):
     response = requests.delete(f'{endpoint}/{index_name}', auth=auth, verify=False)
 
     return response
@@ -27,8 +27,7 @@ def delete_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[
     return response
 
 
-def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None,
-                    data: Optional[dict] = None):
+def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
     document = {
         'title': 'Test Document',
         'content': 'This is a sample document for testing OpenSearch.'
@@ -41,21 +40,10 @@ def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[
     return response
 
 
-def check_document(endpoint, index_name, doc_id, auth=None):
+def check_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
     url = f'{endpoint}/{index_name}/_doc/{doc_id}'
     headers = {'Content-Type': 'application/json'}
 
     response = requests.get(url, headers=headers, auth=auth, verify=False)
 
     return response
-
-
-def get_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
-    url = f'{endpoint}/{index_name}/_doc/{doc_id}'
-    headers = {'Content-Type': 'application/json'}
-
-    response = requests.get(url, headers=headers, auth=auth, verify=False)
-    document = response.json()
-    content = document['_source']
-
-    return content
diff --git a/test/tests.py b/test/tests.py
index 4a82546a2..6fbe7c3bf 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,6 +1,7 @@
-from operations import create_index, check_index, create_document, delete_document, get_document, delete_index
-from operations import check_document
+from operations import create_index, check_index, create_document,\
+    delete_document, delete_index, check_document
 from http import HTTPStatus
+from typing import Tuple, Callable
 import unittest
 import os
 import logging
@@ -9,131 +10,119 @@
 
 logger = logging.getLogger(__name__)
 
-# Tests will say which line the test failed at and what was the result of the execution, but better logging can be done.
+def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, delay: float = 0.5, expected_status_code: HTTPStatus = None):
+    for attempt in range(1, max_attempts + 1):
 
+        result = request(*args)
+        if result.status_code == expected_status_code:
+            return result
+        else:
+            logger.warning(f"Status code returned: {result.status_code} did not"
+                           f" match the expected status code: {expected_status_code}."
+                           f" Trying again in {delay} seconds.")
+            time.sleep(delay)
 
-def retry_request(request, args=(), max_attempts=10, delay=0.5, expectedStatusCode=None):
-    for attempt in range(1, max_attempts + 1):
-        try:
-            result = request(*args)
-            if result.status_code == expectedStatusCode:
-                return result
-            else:
-                logger.warning(f"Status code returned: {result.status_code} did not"
-                               f" match the expected status code: {expectedStatusCode}."
-                               f" Trying again in {delay} seconds.")
-                time.sleep(delay)
-        except Exception:
-            logger.error(f"All {max_attempts} attempts failed.")
-            raise Exception(f"Couldn't get the expected status code: {expectedStatusCode} even after attempting to"
-                            f"retry the request {max_attempts} times.")
+    logger.error(f"All {max_attempts} attempts failed.")
+    logger.error(f"Couldn't get the expected status code: {expected_status_code} while making the request:"
+                     f"{request.__name__} using the following arguments: {args} ")
 
 
 class E2ETests(unittest.TestCase):
+    def common_functionality(self):
+        self.proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        self.source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
+        self.target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        self.username = os.getenv('username', 'admin')
+        self.password = os.getenv('password', 'admin')
+        self.auth = (self.username, self.password)
+        self.index = "my_index"
+        self.doc_id = '7'
+
+
     def setUp(self):
-        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
-        username = os.getenv('username', 'admin')
-        password = os.getenv('password', 'admin')
-        auth = (username, password)
-        index = "my_index"
-        doc_id = '7'
-        delete_index(proxy_endpoint, index, auth)
-        delete_document(proxy_endpoint, index, doc_id, auth)
+        self.common_functionality()
+        delete_index(self.proxy_endpoint, self.index, self.auth)
+        delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
 
     def tearDown(self):
-        proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
-        username = os.getenv('username', 'admin')
-        password = os.getenv('password', 'admin')
-        auth = (username, password)
-        index = "my_index"
-        doc_id = '7'
-        delete_index(proxy_endpoint, index, auth)
-        delete_document(proxy_endpoint, index, doc_id, auth)
+        self.common_functionality()
+        delete_index(self.proxy_endpoint, self.index, self.auth)
+        delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
 
     def test_001_index(self):
-        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
-        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
-        username = os.getenv('username', 'admin')
-        password = os.getenv('password', 'admin')
-        auth = (username, password)
-        index = "my_index"
-
-        proxy_response = create_index(proxy_endpoint, index, auth)
+
+        proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.OK)
+        target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.OK)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
-        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.OK)
+        source_response = retry_request(check_index, args=(self.source_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.OK)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        proxy_response = retry_request(delete_index, args=(proxy_endpoint, index, auth),
-                                       expectedStatusCode=HTTPStatus.OK)
+        proxy_response = retry_request(delete_index, args=(self.proxy_endpoint, self.index, self.auth),
+                                       expected_status_code=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+        target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+        source_response = retry_request(check_index, args=(self.source_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_002_document(self):
-        proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
-        source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
-        target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
-        username = os.getenv('username', 'admin')
-        password = os.getenv('password', 'admin')
-        auth = (username, password)
-        index = "my_index"
-        doc_id = '7'
-
         # Creating an index, then asserting that the index was created on both targets.
-        proxy_response = create_index(proxy_endpoint, index, auth)
+        proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = check_index(target_endpoint, index, auth)
+        target_response = check_index(self.target_endpoint, self.index, self.auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
-        source_response = check_index(source_endpoint, index, auth)
+        source_response = check_index(self.source_endpoint, self.index, self.auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # Creating a document, then asserting that the document was created on both targets.
-        proxy_response = create_document(proxy_endpoint, index, doc_id, auth)
+        proxy_response = create_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
 
-        source_response = check_document(source_endpoint, index, doc_id, auth)
+        source_response = check_document(self.source_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = check_document(target_endpoint, index, doc_id, auth)
+        target_response = check_document(self.target_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
-        # Comparing the document's content on both targets, asserting that they match.
-        source_content = get_document(source_endpoint, index, doc_id, auth)
-        target_content = get_document(target_endpoint, index, doc_id, auth)
+
+
+        # Comparing the document's content on both endpoints, asserting that they match.
+        source_document = source_response.json()
+        source_content = source_document['_source']
+        target_document = target_response.json()
+        target_content = target_document['_source']
         self.assertEqual(source_content, target_content)
 
         # Deleting the document that was created then asserting that it was deleted on both targets.
-        proxy_response = delete_document(proxy_endpoint, index, doc_id, auth)
-        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        proxy_response = delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = retry_request(check_document, args=(target_endpoint, index, doc_id, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+        target_response = retry_request(check_document, args=(self.target_endpoint, self.index, self.doc_id, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = retry_request(check_document, args=(source_endpoint, index, doc_id, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+        source_response = retry_request(check_document, args=(self.source_endpoint, self.index, self.doc_id, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
         # Deleting the index that was created then asserting that it was deleted on both targets.
-        proxy_response = delete_index(proxy_endpoint, index, auth)
+        proxy_response = delete_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = retry_request(check_index, args=(target_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+
+
+        target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = retry_request(check_index, args=(source_endpoint, index, auth),
-                                        expectedStatusCode=HTTPStatus.NOT_FOUND)
+        source_response = retry_request(check_index, args=(self.source_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_003_jupyterAwake(self):

From 234bcd65194009416aff773fd21d4611de543f73 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 11:40:45 -0500
Subject: [PATCH 31/57] linting fixes

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index 6fbe7c3bf..6ed680c79 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -10,7 +10,9 @@
 
 logger = logging.getLogger(__name__)
 
-def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, delay: float = 0.5, expected_status_code: HTTPStatus = None):
+
+def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, delay: float = 0.5,
+                  expected_status_code: HTTPStatus = None):
     for attempt in range(1, max_attempts + 1):
 
         result = request(*args)
@@ -24,7 +26,7 @@ def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, d
 
     logger.error(f"All {max_attempts} attempts failed.")
     logger.error(f"Couldn't get the expected status code: {expected_status_code} while making the request:"
-                     f"{request.__name__} using the following arguments: {args} ")
+                 f"{request.__name__} using the following arguments: {args} ")
 
 
 class E2ETests(unittest.TestCase):
@@ -38,7 +40,6 @@ def common_functionality(self):
         self.index = "my_index"
         self.doc_id = '7'
 
-
     def setUp(self):
         self.common_functionality()
         delete_index(self.proxy_endpoint, self.index, self.auth)
@@ -92,8 +93,6 @@ def test_002_document(self):
         target_response = check_document(self.target_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
-
-
         # Comparing the document's content on both endpoints, asserting that they match.
         source_document = source_response.json()
         source_content = source_document['_source']
@@ -116,8 +115,6 @@ def test_002_document(self):
         proxy_response = delete_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-
-
         target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
                                         expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)

From 0f6b8e650e354dbdea984b49ef52553dcba43b9b Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 13:30:47 -0500
Subject: [PATCH 32/57] Add comments about each test + add documentation about
 test script in general in repo's readme

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 README.md     | 33 +++++++++++++++++++++++++++++----
 test/tests.py | 12 +++++++++++-
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0335a5027..98dcb8e39 100644
--- a/README.md
+++ b/README.md
@@ -12,11 +12,23 @@ This repo will contain code and documentation to assist in migrations and upgrad
 
 Developers must run the "install_githooks.sh" script in order to add the pre-commit hook.
 
-## End-to-End Testing
+## Docker Solution
 
-Developers can run a test script which will verify the end-to-end solution.
+The TrafficCapture directory hosts a set of projects designed to facilitate the proxying and capturing of HTTP
+traffic, which can then be offloaded and replayed to other HTTP server(s).
 
-To run the test script, users must navigate to the test directory, install the required packages then run the script:
+More documentation on this solution can be found here:
+[TrafficCapture README](TrafficCapture/README.md)
+
+### End-to-End Testing
+
+Developers can run a test script which will verify the end-to-end Docker Solution.
+#### Pre-requisites
+
+* Have all containers from docker solution running.
+
+To run the test script, users must navigate to the [test directory](test/),
+install the required packages then run the script:
 
 ```
 cd test
@@ -24,6 +36,20 @@ pip install -r requirements.txt
 pytest tests.py
 ```
 
+#### Notes 
+##### Ports Setup
+The test script, by default, uses the ports assigned to the containers in this
+[docker-compose file](TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml), so if the docker solution in
+it's current setup started with no issues, then the test script will run as is. If for any reason
+the user changed the ports in that file, they must also either, change the following environment variables:
+`PROXY_ENDPOINT`, `SOURCE_ENDPOINT`, `TARGET_ENDPOINT` and `JUPYTER_NOTEBOOK` respectively, or update the default value
+for them in [tests.py](test/tests.py). Those are the only endpoints touched by this script.
+#### Clean Up
+The test script is implemented with a setup and teardown functions that are ran after 
+each and every test where additions made to the endpoints are deleted, *mostly* cleaning up after themselves, however,
+as we log all operations going through the proxy (which is capturing the traffic), those are only being 
+deleted after the docker solution is shut down.
+
 ## Security
 
 See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
@@ -31,4 +57,3 @@ See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more inform
 ## License
 
 This project is licensed under the Apache-2.0 License.
-
diff --git a/test/tests.py b/test/tests.py
index 6ed680c79..1ee12eb57 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -31,7 +31,7 @@ def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, d
 
 class E2ETests(unittest.TestCase):
     def common_functionality(self):
-        self.proxy_endpoint = os.getenv('SOURCE_ENDPOINT', 'https://localhost:9200')
+        self.proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         self.source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         self.target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
         self.username = os.getenv('username', 'admin')
@@ -51,7 +51,12 @@ def tearDown(self):
         delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
 
     def test_001_index(self):
+        # This test will verify that an index will be created (then deleted) on the target cluster when one is created
+        # on the source cluster by going through the proxy first. It will verify that the traffic is captured by the
+        # proxy and that the traffic reaches the source cluster, replays said traffic to the target cluster by the
+        # replayer.
 
+        # Creating an index, then asserting that the index was created on both targets.
         proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
@@ -74,6 +79,11 @@ def test_001_index(self):
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 
     def test_002_document(self):
+        # This test will verify that a document will be created (then deleted) on the target cluster when one is created
+        # on the source cluster by going through the proxy first. It will verify that the traffic is captured by the
+        # proxy and that the traffic reaches the source cluster, replays said traffic to the target cluster by the
+        # replayer.
+
         # Creating an index, then asserting that the index was created on both targets.
         proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)

From da2bb1d3c16f3474c1fbbd879824342d3b71ed0d Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 14:51:10 -0500
Subject: [PATCH 33/57] Add documentation for retry request function + code
 refactoring

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/operations.py |  2 +-
 test/tests.py      | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 019ac0446..541073969 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -40,7 +40,7 @@ def create_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[
     return response
 
 
-def check_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
+def get_document(endpoint: str, index_name: str, doc_id: str, auth: Optional[Tuple[str, str]] = None):
     url = f'{endpoint}/{index_name}/_doc/{doc_id}'
     headers = {'Content-Type': 'application/json'}
 
diff --git a/test/tests.py b/test/tests.py
index 1ee12eb57..e5586671d 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -1,5 +1,5 @@
 from operations import create_index, check_index, create_document,\
-    delete_document, delete_index, check_document
+    delete_document, delete_index, get_document
 from http import HTTPStatus
 from typing import Tuple, Callable
 import unittest
@@ -11,6 +11,10 @@
 logger = logging.getLogger(__name__)
 
 
+# The following "retry_request" function's purpose is to retry a certain request for "max_attempts"
+# times every "delay" seconds IF the requests returned a status code other than what's expected.
+# So this "retry_request" function's arguments are a request function's name and whatever arguments that function
+# expects, and the status code the request function is expecting to get.
 def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, delay: float = 0.5,
                   expected_status_code: HTTPStatus = None):
     for attempt in range(1, max_attempts + 1):
@@ -30,7 +34,7 @@ def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, d
 
 
 class E2ETests(unittest.TestCase):
-    def common_functionality(self):
+    def set_common_values(self):
         self.proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         self.source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         self.target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
@@ -41,12 +45,11 @@ def common_functionality(self):
         self.doc_id = '7'
 
     def setUp(self):
-        self.common_functionality()
+        self.set_common_values()
         delete_index(self.proxy_endpoint, self.index, self.auth)
         delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
 
     def tearDown(self):
-        self.common_functionality()
         delete_index(self.proxy_endpoint, self.index, self.auth)
         delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
 
@@ -97,10 +100,10 @@ def test_002_document(self):
         proxy_response = create_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
 
-        source_response = check_document(self.source_endpoint, self.index, self.doc_id, self.auth)
+        source_response = get_document(self.source_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
-        target_response = check_document(self.target_endpoint, self.index, self.doc_id, self.auth)
+        target_response = get_document(self.target_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
 
         # Comparing the document's content on both endpoints, asserting that they match.
@@ -114,10 +117,10 @@ def test_002_document(self):
         proxy_response = delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = retry_request(check_document, args=(self.target_endpoint, self.index, self.doc_id, self.auth),
+        target_response = retry_request(get_document, args=(self.target_endpoint, self.index, self.doc_id, self.auth),
                                         expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(target_response.status_code, HTTPStatus.NOT_FOUND)
-        source_response = retry_request(check_document, args=(self.source_endpoint, self.index, self.doc_id, self.auth),
+        source_response = retry_request(get_document, args=(self.source_endpoint, self.index, self.doc_id, self.auth),
                                         expected_status_code=HTTPStatus.NOT_FOUND)
         self.assertEqual(source_response.status_code, HTTPStatus.NOT_FOUND)
 

From 2ab8162edff7be90a93d977a2469e2c2ee3514f0 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 22:44:17 -0500
Subject: [PATCH 34/57] re-add new and improved exception handling

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 README.md     | 14 ++++++++++----
 test/tests.py | 53 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 98dcb8e39..8d9125fed 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ More documentation on this solution can be found here:
 Developers can run a test script which will verify the end-to-end Docker Solution.
 #### Pre-requisites
 
-* Have all containers from docker solution running.
+* Have all containers from Docker solution running.
 
 To run the test script, users must navigate to the [test directory](test/),
 install the required packages then run the script:
@@ -39,16 +39,22 @@ pytest tests.py
 #### Notes 
 ##### Ports Setup
 The test script, by default, uses the ports assigned to the containers in this
-[docker-compose file](TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml), so if the docker solution in
+[docker-compose file](TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml), so if the Docker solution in
 it's current setup started with no issues, then the test script will run as is. If for any reason
 the user changed the ports in that file, they must also either, change the following environment variables:
 `PROXY_ENDPOINT`, `SOURCE_ENDPOINT`, `TARGET_ENDPOINT` and `JUPYTER_NOTEBOOK` respectively, or update the default value
-for them in [tests.py](test/tests.py). Those are the only endpoints touched by this script.
+(which can be found below) for them in [tests.py](test/tests.py).
+
+The following are the default values for the only endpoints touched by this script:
+* `PROXY_ENDPOINT = https://localhost:9200`
+* `SOURCE_ENDPOINT = http://localhost:19200`
+* `TARGET_ENDPOINT = https://localhost:29200`
+* `JUPYTER_NOTEBOOK = http://localhost:8888/api`
 #### Clean Up
 The test script is implemented with a setup and teardown functions that are ran after 
 each and every test where additions made to the endpoints are deleted, *mostly* cleaning up after themselves, however,
 as we log all operations going through the proxy (which is capturing the traffic), those are only being 
-deleted after the docker solution is shut down.
+deleted after the Docker solution is shut down.
 
 ## Security
 
diff --git a/test/tests.py b/test/tests.py
index e5586671d..376f66bab 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -7,6 +7,7 @@
 import logging
 import time
 import requests
+from requests.exceptions import ConnectionError, SSLError
 
 logger = logging.getLogger(__name__)
 
@@ -18,19 +19,31 @@
 def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, delay: float = 0.5,
                   expected_status_code: HTTPStatus = None):
     for attempt in range(1, max_attempts + 1):
-
-        result = request(*args)
-        if result.status_code == expected_status_code:
-            return result
-        else:
-            logger.warning(f"Status code returned: {result.status_code} did not"
-                           f" match the expected status code: {expected_status_code}."
-                           f" Trying again in {delay} seconds.")
+        try:
+            result = request(*args)
+            if result.status_code == expected_status_code:
+                return result
+            else:
+                logger.warning(f"Status code returned: {result.status_code} did not"
+                               f" match the expected status code: {expected_status_code}."
+                               f" Trying again in {delay} seconds.")
+                time.sleep(delay)
+        except ConnectionError as e:
+            logger.error(f"Received exception: {e}. Unable to connect to server. Please check all containers are up"
+                         f" and ports are setup properly")
+            logger.warning(f"Trying again in {delay} seconds.")
             time.sleep(delay)
-
+            continue
+        except SSLError as e:
+            logger.error(f"Received exception: {e}. Unable to connect to server. Please check all containers are up"
+                         f"and ports are setup properly")
+            logger.warning(f"Trying again in {delay} seconds.")
+            time.sleep(delay)
+            continue
     logger.error(f"All {max_attempts} attempts failed.")
     logger.error(f"Couldn't get the expected status code: {expected_status_code} while making the request:"
                  f"{request.__name__} using the following arguments: {args} ")
+    return None
 
 
 class E2ETests(unittest.TestCase):
@@ -38,6 +51,7 @@ def set_common_values(self):
         self.proxy_endpoint = os.getenv('PROXY_ENDPOINT', 'https://localhost:9200')
         self.source_endpoint = os.getenv('SOURCE_ENDPOINT', 'http://localhost:19200')
         self.target_endpoint = os.getenv('TARGET_ENDPOINT', 'https://localhost:29200')
+        self.jupyter_endpoint = os.getenv('JUPYTER_NOTEBOOK', 'http://localhost:8888/api')
         self.username = os.getenv('username', 'admin')
         self.password = os.getenv('password', 'admin')
         self.auth = (self.username, self.password)
@@ -46,8 +60,10 @@ def set_common_values(self):
 
     def setUp(self):
         self.set_common_values()
-        delete_index(self.proxy_endpoint, self.index, self.auth)
-        delete_document(self.proxy_endpoint, self.index, self.doc_id, self.auth)
+        retry_request(delete_index, args=(self.proxy_endpoint, self.index, self.auth),
+                      expected_status_code=HTTPStatus.NOT_FOUND)
+        retry_request(delete_document, args=(self.target_endpoint, self.index, self.auth),
+                      expected_status_code=HTTPStatus.NOT_FOUND)
 
     def tearDown(self):
         delete_index(self.proxy_endpoint, self.index, self.auth)
@@ -60,7 +76,8 @@ def test_001_index(self):
         # replayer.
 
         # Creating an index, then asserting that the index was created on both targets.
-        proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
+        proxy_response = retry_request(create_index, args=(self.proxy_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
@@ -88,12 +105,15 @@ def test_002_document(self):
         # replayer.
 
         # Creating an index, then asserting that the index was created on both targets.
-        proxy_response = create_index(self.proxy_endpoint, self.index, self.auth)
+        proxy_response = retry_request(create_index, args=(self.proxy_endpoint, self.index, self.auth),
+                                       expected_status_code=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
-        target_response = check_index(self.target_endpoint, self.index, self.auth)
+        target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.OK)
         self.assertEqual(target_response.status_code, HTTPStatus.OK)
-        source_response = check_index(self.source_endpoint, self.index, self.auth)
+        source_response = retry_request(check_index, args=(self.source_endpoint, self.index, self.auth),
+                                        expected_status_code=HTTPStatus.OK)
         self.assertEqual(source_response.status_code, HTTPStatus.OK)
 
         # Creating a document, then asserting that the document was created on both targets.
@@ -137,6 +157,5 @@ def test_002_document(self):
 
     def test_003_jupyterAwake(self):
         # Making sure that the Jupyter notebook is up and can be reached.
-        jupyter_endpoint = os.getenv('JUPYTER_NOTEBOOK', 'http://localhost:8888/api')
-        response = requests.get(jupyter_endpoint)
+        response = requests.get(self.jupyter_endpoint)
         self.assertEqual(response.status_code, HTTPStatus.OK)

From 7571378a76ece516b3c62c65da467c12c29c3246 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Thu, 15 Jun 2023 22:44:39 -0500
Subject: [PATCH 35/57] lint fix

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/tests.py b/test/tests.py
index 376f66bab..4f7337551 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -77,7 +77,7 @@ def test_001_index(self):
 
         # Creating an index, then asserting that the index was created on both targets.
         proxy_response = retry_request(create_index, args=(self.proxy_endpoint, self.index, self.auth),
-                                        expected_status_code=HTTPStatus.OK)
+                                       expected_status_code=HTTPStatus.OK)
         self.assertEqual(proxy_response.status_code, HTTPStatus.OK)
 
         target_response = retry_request(check_index, args=(self.target_endpoint, self.index, self.auth),

From bec33bced32b81cc8c0d222fcb218cbeb2ef33f5 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 16 Jun 2023 10:14:56 -0500
Subject: [PATCH 36/57] add raising an exception to retry logic

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 test/tests.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/tests.py b/test/tests.py
index 4f7337551..ea2f7ae4a 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -40,10 +40,9 @@ def retry_request(request: Callable, args: Tuple = (), max_attempts: int = 10, d
             logger.warning(f"Trying again in {delay} seconds.")
             time.sleep(delay)
             continue
-    logger.error(f"All {max_attempts} attempts failed.")
     logger.error(f"Couldn't get the expected status code: {expected_status_code} while making the request:"
-                 f"{request.__name__} using the following arguments: {args} ")
-    return None
+                 f"{request.__name__} using the following arguments: {args}.")
+    raise Exception(f"All {max_attempts} retry attempts failed. Please check the logs for more information.")
 
 
 class E2ETests(unittest.TestCase):
@@ -62,7 +61,7 @@ def setUp(self):
         self.set_common_values()
         retry_request(delete_index, args=(self.proxy_endpoint, self.index, self.auth),
                       expected_status_code=HTTPStatus.NOT_FOUND)
-        retry_request(delete_document, args=(self.target_endpoint, self.index, self.auth),
+        retry_request(delete_document, args=(self.proxy_endpoint, self.index, self.auth),
                       expected_status_code=HTTPStatus.NOT_FOUND)
 
     def tearDown(self):

From 3a21ea56a6f4bbe68ed931dfa9a96db639718e73 Mon Sep 17 00:00:00 2001
From: Tanner Lewis <lewijacn@amazon.com>
Date: Mon, 19 Jun 2023 11:04:44 -0400
Subject: [PATCH 37/57] Add Support for Proxy to connect to HTTPS Servers
 (#203)

* MIGRATIONS-1105: Add tls for backside of proxy

Signed-off-by: Tanner Lewis <lewijacn@amazon.com>
---
 .../src/main/docker/docker-compose.yml        |  2 +-
 .../trafficcapture/proxyserver/Main.java      | 62 +++++++++++++++----
 .../proxyserver/netty/FrontsideHandler.java   | 29 ++++++---
 .../netty/NettyScanningHttpProxy.java         |  9 +--
 .../netty/ProxyChannelInitializer.java        | 14 +++--
 .../netty/NettyScanningHttpProxyTest.java     |  8 ++-
 6 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml b/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml
index 266442aa9..16121b04e 100644
--- a/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml
+++ b/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml
@@ -7,7 +7,7 @@ services:
       - migrations
     ports:
       - "9200:9200"
-    command: /runJavaWithClasspath.sh org.opensearch.migrations.trafficcapture.proxyserver.Main  --kafkaConnection kafka:9092 --destinationHost  elasticsearch --destinationPort 9200 --listenPort 9200 --sslConfigFile /usr/share/elasticsearch/config/proxy_tls.yml
+    command: /runJavaWithClasspath.sh org.opensearch.migrations.trafficcapture.proxyserver.Main  --kafkaConnection kafka:9092 --destinationUri  http://elasticsearch:9200 --listenPort 9200 --sslConfigFile /usr/share/elasticsearch/config/proxy_tls.yml
     depends_on:
       - kafka
       - elasticsearch
diff --git a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java
index 64176d3f4..1be8ef129 100644
--- a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java
+++ b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java
@@ -4,6 +4,9 @@
 import com.beust.jcommander.Parameter;
 import com.beust.jcommander.ParameterException;
 import com.google.protobuf.CodedOutputStream;
+import io.netty.handler.ssl.SslContext;
+import io.netty.handler.ssl.SslContextBuilder;
+import io.netty.handler.ssl.util.InsecureTrustManagerFactory;
 import lombok.NonNull;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
@@ -11,7 +14,6 @@
 import org.apache.logging.log4j.core.util.NullOutputStream;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.migrations.trafficcapture.FileConnectionCaptureFactory;
-import org.opensearch.migrations.trafficcapture.IChannelConnectionCaptureSerializer;
 import org.opensearch.migrations.trafficcapture.IConnectionCaptureFactory;
 import org.opensearch.migrations.trafficcapture.StreamChannelConnectionCaptureSerializer;
 import org.opensearch.migrations.trafficcapture.kafkaoffloader.KafkaCaptureFactory;
@@ -20,9 +22,10 @@
 import org.opensearch.security.ssl.util.SSLConfigConstants;
 
 import javax.net.ssl.SSLEngine;
-import java.io.File;
+import javax.net.ssl.SSLException;
 import java.io.FileReader;
 import java.io.IOException;
+import java.net.URI;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.Optional;
@@ -73,15 +76,15 @@ static class Parameters {
                 description = "The maximum number of bytes that will be written to a single TrafficStream object.")
         int maximumTrafficStreamSize = 1024*1024;
         @Parameter(required = false,
-                names = {"--destinationHost"},
-                arity = 1,
-                description = "Hostname of the server that the proxy is capturing traffic for.")
-        String backsideHostname = "localhost";
+            names = {"--insecureDestination"},
+            arity = 0,
+            description = "Do not check the destination server's certificate")
+        boolean allowInsecureConnectionsToBackside;
         @Parameter(required = true,
-                names = {"--destinationPort"},
+                names = {"--destinationUri"},
                 arity = 1,
-                description = "Port of the server that the proxy connects to.")
-        int backsidePort = 0;
+                description = "URI of the server that the proxy is capturing traffic for.")
+        String backsideUriString;
         @Parameter(required = true,
                 names = {"--listenPort"},
                 arity = 1,
@@ -171,11 +174,48 @@ private static IConnectionCaptureFactory getConnectionCaptureFactory(Parameters
         }
     }
 
+    // Utility method for converting uri string to an actual URI object. Similar logic is placed in the trafficReplayer
+    // module: TrafficReplayer.java
+    private static URI convertStringToUri(String uriString) {
+        URI serverUri;
+        try {
+            serverUri = new URI(uriString);
+        } catch (Exception e) {
+            System.err.println("Exception parsing URI string: " + uriString);
+            System.err.println(e.getMessage());
+            System.exit(3);
+            return null;
+        }
+        if (serverUri.getPort() < 0) {
+            throw new RuntimeException("Port not present for URI: " + serverUri);
+        }
+        if (serverUri.getHost() == null) {
+            throw new RuntimeException("Hostname not present for URI: " + serverUri);
+        }
+        if (serverUri.getScheme() == null) {
+            throw new RuntimeException("Scheme (http|https) is not present for URI: " + serverUri);
+        }
+        return serverUri;
+    }
+
+    private static SslContext loadBacksideSslContext(URI serverUri, boolean allowInsecureConnections) throws
+        SSLException {
+        if (serverUri.getScheme().equalsIgnoreCase("https")) {
+            var sslContextBuilder = SslContextBuilder.forClient();
+            if (allowInsecureConnections) {
+                sslContextBuilder.trustManager(InsecureTrustManagerFactory.INSTANCE);
+            }
+            return sslContextBuilder.build();
+        } else {
+            return null;
+        }
+    }
+
     public static void main(String[] args) throws InterruptedException, IOException {
 
         var params = parseArgs(args);
+        var backsideUri = convertStringToUri(params.backsideUriString);
 
-        // This should be added to the argument parser when added in
         var sksOp = Optional.ofNullable(params.sslConfigFilePath)
                 .map(sslConfigFile->new DefaultSecurityKeyStore(getSettings(sslConfigFile),
                         Paths.get(sslConfigFile).toAbsolutePath().getParent()));
@@ -184,7 +224,7 @@ public static void main(String[] args) throws InterruptedException, IOException
         var proxy = new NettyScanningHttpProxy(params.frontsidePort);
 
         try {
-            proxy.start(params.backsideHostname, params.backsidePort,
+            proxy.start(backsideUri, loadBacksideSslContext(backsideUri, params.allowInsecureConnectionsToBackside),
                     sksOp.map(sks-> (Supplier<SSLEngine>) () -> {
                         try {
                             var sslEngine = sks.createHTTPSSLEngine();
diff --git a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/FrontsideHandler.java b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/FrontsideHandler.java
index cea9eb66d..7344af3c2 100644
--- a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/FrontsideHandler.java
+++ b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/FrontsideHandler.java
@@ -8,26 +8,29 @@
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.channel.ChannelInboundHandlerAdapter;
 import io.netty.channel.ChannelOption;
-import io.netty.handler.logging.LogLevel;
-import io.netty.handler.logging.LoggingHandler;
+import io.netty.handler.ssl.SslContext;
+import io.netty.handler.ssl.SslHandler;
 import lombok.extern.slf4j.Slf4j;
 
+import javax.net.ssl.SSLEngine;
+import java.net.URI;
+
 @Slf4j
 public class FrontsideHandler extends ChannelInboundHandlerAdapter {
 
     private Channel outboundChannel;
 
-    private final String host;
-    private final int port;
+    private final URI backsideUri;
+    private final SslContext backsideSslContext;
 
     /**
      * Create a handler that sets the autoreleases flag
-     * @param host
-     * @param port
+     * @param backsideUri
+     * @param backsideSslContext
      */
-    public FrontsideHandler(String host, int port) {
-        this.host = host;
-        this.port = port;
+    public FrontsideHandler(URI backsideUri, SslContext backsideSslContext) {
+        this.backsideUri = backsideUri;
+        this.backsideSslContext = backsideSslContext;
     }
 
     @Override
@@ -40,7 +43,7 @@ public void channelActive(ChannelHandlerContext ctx) {
                 .handler(new BacksideHandler(inboundChannel))
                 .option(ChannelOption.AUTO_READ, false);
         log.debug("Active - setting up backend connection");
-        var f = b.connect(host, port);
+        var f = b.connect(backsideUri.getHost(), backsideUri.getPort());
         outboundChannel = f.channel();
         f.addListener(new ChannelFutureListener() {
             @Override
@@ -48,6 +51,12 @@ public void operationComplete(ChannelFuture future) {
                 if (future.isSuccess()) {
                     // connection complete start to read first data
                     log.debug("Done setting up backend channel & it was successful");
+                    if (backsideSslContext != null) {
+                        var pipeline = future.channel().pipeline();
+                        SSLEngine sslEngine = backsideSslContext.newEngine(future.channel().alloc());
+                        sslEngine.setUseClientMode(true);
+                        pipeline.addFirst("ssl", new SslHandler(sslEngine));
+                    }
                     inboundChannel.read();
                 } else {
                     // Close the connection if the connection attempt has failed.
diff --git a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxy.java b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxy.java
index d7f3b2c5b..0d456f12e 100644
--- a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxy.java
+++ b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxy.java
@@ -6,11 +6,13 @@
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.nio.NioEventLoopGroup;
 import io.netty.channel.socket.nio.NioServerSocketChannel;
+import io.netty.handler.ssl.SslContext;
 import io.netty.util.internal.logging.InternalLoggerFactory;
 import io.netty.util.internal.logging.JdkLoggerFactory;
 import org.opensearch.migrations.trafficcapture.IConnectionCaptureFactory;
 
 import javax.net.ssl.SSLEngine;
+import java.net.URI;
 import java.util.function.Supplier;
 
 public class NettyScanningHttpProxy {
@@ -27,9 +29,8 @@ public int getProxyPort() {
         return proxyPort;
     }
 
-    public void start(String backsideHost, int backsidePort, Supplier<SSLEngine> sslEngineSupplier,
-                      IConnectionCaptureFactory connectionCaptureFactory)
-            throws InterruptedException {
+    public void start(URI backsideUri, SslContext backsideSslContext, Supplier<SSLEngine> sslEngineSupplier,
+        IConnectionCaptureFactory connectionCaptureFactory) throws InterruptedException {
         InternalLoggerFactory.setDefaultFactory(JdkLoggerFactory.INSTANCE);
         bossGroup = new NioEventLoopGroup(1);
         workerGroup = new NioEventLoopGroup();
@@ -39,7 +40,7 @@ public void start(String backsideHost, int backsidePort, Supplier<SSLEngine> ssl
                     .channel(NioServerSocketChannel.class)
                     //.handler(new LoggingHandler(LogLevel.INFO))
                     //.childHandler(new HexDumpProxyInitializer(backsideHost, backsidePort))
-                    .childHandler(new ProxyChannelInitializer(backsideHost, backsidePort, sslEngineSupplier,
+                    .childHandler(new ProxyChannelInitializer(backsideUri, backsideSslContext, sslEngineSupplier,
                             connectionCaptureFactory))
                     .childOption(ChannelOption.AUTO_READ, false)
                     .bind(proxyPort).sync().channel();
diff --git a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/ProxyChannelInitializer.java b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/ProxyChannelInitializer.java
index 29ff8d919..3d60428c1 100644
--- a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/ProxyChannelInitializer.java
+++ b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/ProxyChannelInitializer.java
@@ -8,6 +8,7 @@
 import io.netty.handler.codec.http.HttpRequest;
 import io.netty.handler.logging.LogLevel;
 import io.netty.handler.logging.LoggingHandler;
+import io.netty.handler.ssl.SslContext;
 import io.netty.handler.ssl.SslHandler;
 import org.opensearch.migrations.trafficcapture.IConnectionCaptureFactory;
 import org.opensearch.migrations.trafficcapture.netty.ConditionallyReliableLoggingHttpRequestHandler;
@@ -16,19 +17,20 @@
 
 import javax.net.ssl.SSLEngine;
 import java.io.IOException;
+import java.net.URI;
 import java.util.function.Supplier;
 
 public class ProxyChannelInitializer extends ChannelInitializer<SocketChannel> {
 
     private final IConnectionCaptureFactory connectionCaptureFactory;
     private final Supplier<SSLEngine> sslEngineProvider;
-    private final String host;
-    private final int port;
+    private final URI backsideUri;
+    private final SslContext backsideSslContext;
 
-    public ProxyChannelInitializer(String host, int port, Supplier<SSLEngine> sslEngineSupplier,
+    public ProxyChannelInitializer(URI backsideUri, SslContext backsideSslContext, Supplier<SSLEngine> sslEngineSupplier,
                                    IConnectionCaptureFactory connectionCaptureFactory) {
-        this.host = host;
-        this.port = port;
+        this.backsideUri = backsideUri;
+        this.backsideSslContext = backsideSslContext;
         this.sslEngineProvider = sslEngineSupplier;
         this.connectionCaptureFactory = connectionCaptureFactory;
     }
@@ -53,6 +55,6 @@ protected void initChannel(SocketChannel ch) throws IOException {
         ch.pipeline().addLast(new ConditionallyReliableLoggingHttpRequestHandler(offloader,
                 this::shouldGuaranteeMessageOffloading));
         //ch.pipeline().addLast(new LoggingHandler("POST", LogLevel.ERROR));
-        ch.pipeline().addLast(new FrontsideHandler(host, port));
+        ch.pipeline().addLast(new FrontsideHandler(backsideUri, backsideSslContext));
     }
 }
diff --git a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxyTest.java b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxyTest.java
index ad2e9620e..478eb9b77 100644
--- a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxyTest.java
+++ b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/netty/NettyScanningHttpProxyTest.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
@@ -209,10 +210,11 @@ private static String makeTestRequestViaClient(CloseableHttpClient client, URI n
         retryWithNewPortUntilNoThrow(port -> {
             nshp.set(new NettyScanningHttpProxy(port.intValue()));
             try {
-                nshp.get().start(LOCALHOST, upstreamTestServer.get().getAddress().getPort(), null,
-                        connectionCaptureFactory);
+                URI testServerUri = new URI("http", null, LOCALHOST, upstreamTestServer.get().getAddress().getPort(),
+                    null, null, null);
+                nshp.get().start(testServerUri,null, null, connectionCaptureFactory);
                 System.out.println("proxy port = "+port.intValue());
-            } catch (InterruptedException e) {
+            } catch (InterruptedException | URISyntaxException e) {
                 throw new RuntimeException(e);
             }
         });

From ef12644695828f23709c63980e9ca81ba8351b3e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Jun 2023 09:52:47 -0400
Subject: [PATCH 38/57] Bump aws-cdk-lib in
 /deployment/cdk/opensearch-service-migration (#206)

Bumps [aws-cdk-lib](https://github.com/aws/aws-cdk/tree/HEAD/packages/aws-cdk-lib) from 2.62.2 to 2.80.0.
- [Release notes](https://github.com/aws/aws-cdk/releases)
- [Changelog](https://github.com/aws/aws-cdk/blob/main/CHANGELOG.v2.md)
- [Commits](https://github.com/aws/aws-cdk/commits/v2.80.0/packages/aws-cdk-lib)

---
updated-dependencies:
- dependency-name: aws-cdk-lib
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .../package-lock.json                         | 344 +++++++++++++++---
 .../opensearch-service-migration/package.json |   2 +-
 2 files changed, 299 insertions(+), 47 deletions(-)

diff --git a/deployment/cdk/opensearch-service-migration/package-lock.json b/deployment/cdk/opensearch-service-migration/package-lock.json
index ae960d791..40f8f64ef 100644
--- a/deployment/cdk/opensearch-service-migration/package-lock.json
+++ b/deployment/cdk/opensearch-service-migration/package-lock.json
@@ -8,7 +8,7 @@
       "name": "opensearch-service-domain-cdk",
       "version": "0.1.0",
       "dependencies": {
-        "aws-cdk-lib": "2.62.2",
+        "aws-cdk-lib": "2.80.0",
         "constructs": "^10.0.0",
         "source-map-support": "^0.5.21"
       },
@@ -39,9 +39,9 @@
       }
     },
     "node_modules/@aws-cdk/asset-awscli-v1": {
-      "version": "2.2.52",
-      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.52.tgz",
-      "integrity": "sha512-9dBPrvByWrUOcs5Rjwv08FWSummo1Uk/EgE3dCFDqvIqlSTudEmu6TGU3zrs00rfcAjqDv6gBuSttzG5f9tfdQ=="
+      "version": "2.2.196",
+      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.196.tgz",
+      "integrity": "sha512-F8hU1rEzYS7z5Dt2s+ttd0/jMvPuUE9BcXexgq+dIOLuZsRpDwNnkMBtjNaJXJS48ZGJ2X4b8VlklseepdtoSA=="
     },
     "node_modules/@aws-cdk/asset-kubectl-v20": {
       "version": "2.1.1",
@@ -49,9 +49,9 @@
       "integrity": "sha512-U1ntiX8XiMRRRH5J1IdC+1t5CE89015cwyt5U63Cpk0GnMlN5+h9WsWMlKlPXZR4rdq/m806JRlBMRpBUB2Dhw=="
     },
     "node_modules/@aws-cdk/asset-node-proxy-agent-v5": {
-      "version": "2.0.42",
-      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v5/-/asset-node-proxy-agent-v5-2.0.42.tgz",
-      "integrity": "sha512-PxvP1UU2xa4k3Ea78DxAYY8ADvwWZ/nPu+xsjQLsT+MP+aB3RZ3pGc/fNlH7Rg56Zyb/j3GSdihAy4Oi5xa+TQ=="
+      "version": "2.0.165",
+      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v5/-/asset-node-proxy-agent-v5-2.0.165.tgz",
+      "integrity": "sha512-bsyLQD/vqXQcc9RDmlM1XqiFNO/yewgVFXmkMcQkndJbmE/jgYkzewwYGrBlfL725hGLQipXq19+jwWwdsXQqg=="
     },
     "node_modules/@babel/code-frame": {
       "version": "7.18.6",
@@ -1257,9 +1257,9 @@
       }
     },
     "node_modules/aws-cdk-lib": {
-      "version": "2.62.2",
-      "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.62.2.tgz",
-      "integrity": "sha512-ynyoEFQckICFJzbUd89pWjol3GGbxRF05E8BCPEyy++vLHJZdqaJxRL4REl4lrdznnkb1kvxtBSGg4cOkR4o3w==",
+      "version": "2.80.0",
+      "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.80.0.tgz",
+      "integrity": "sha512-PoqD3Yms5I0ajuTi071nTW/hpkH3XsdyZzn5gYsPv0qD7mqP3h6Qr+6RiGx+yQ1KcVFyxWdX15uK+DsC0KwvcQ==",
       "bundleDependencies": [
         "@balena/dockerignore",
         "case",
@@ -1269,20 +1269,22 @@
         "minimatch",
         "punycode",
         "semver",
+        "table",
         "yaml"
       ],
       "dependencies": {
-        "@aws-cdk/asset-awscli-v1": "^2.2.49",
+        "@aws-cdk/asset-awscli-v1": "^2.2.177",
         "@aws-cdk/asset-kubectl-v20": "^2.1.1",
-        "@aws-cdk/asset-node-proxy-agent-v5": "^2.0.38",
+        "@aws-cdk/asset-node-proxy-agent-v5": "^2.0.148",
         "@balena/dockerignore": "^1.0.2",
         "case": "1.6.3",
-        "fs-extra": "^9.1.0",
+        "fs-extra": "^11.1.1",
         "ignore": "^5.2.4",
         "jsonschema": "^1.4.1",
         "minimatch": "^3.1.2",
-        "punycode": "^2.2.0",
-        "semver": "^7.3.8",
+        "punycode": "^2.3.0",
+        "semver": "^7.5.1",
+        "table": "^6.8.1",
         "yaml": "1.10.2"
       },
       "engines": {
@@ -1297,12 +1299,49 @@
       "inBundle": true,
       "license": "Apache-2.0"
     },
-    "node_modules/aws-cdk-lib/node_modules/at-least-node": {
-      "version": "1.0.0",
+    "node_modules/aws-cdk-lib/node_modules/ajv": {
+      "version": "8.12.0",
       "inBundle": true,
-      "license": "ISC",
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "inBundle": true,
+      "license": "MIT",
       "engines": {
-        "node": ">= 4.0.0"
+        "node": ">=8"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/astral-regex": {
+      "version": "2.0.0",
+      "inBundle": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
       }
     },
     "node_modules/aws-cdk-lib/node_modules/balanced-match": {
@@ -1327,27 +1366,52 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/color-convert": {
+      "version": "2.0.1",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/color-name": {
+      "version": "1.1.4",
+      "inBundle": true,
+      "license": "MIT"
+    },
     "node_modules/aws-cdk-lib/node_modules/concat-map": {
       "version": "0.0.1",
       "inBundle": true,
       "license": "MIT"
     },
+    "node_modules/aws-cdk-lib/node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "inBundle": true,
+      "license": "MIT"
+    },
+    "node_modules/aws-cdk-lib/node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "inBundle": true,
+      "license": "MIT"
+    },
     "node_modules/aws-cdk-lib/node_modules/fs-extra": {
-      "version": "9.1.0",
+      "version": "11.1.1",
       "inBundle": true,
       "license": "MIT",
       "dependencies": {
-        "at-least-node": "^1.0.0",
         "graceful-fs": "^4.2.0",
         "jsonfile": "^6.0.1",
         "universalify": "^2.0.0"
       },
       "engines": {
-        "node": ">=10"
+        "node": ">=14.14"
       }
     },
     "node_modules/aws-cdk-lib/node_modules/graceful-fs": {
-      "version": "4.2.10",
+      "version": "4.2.11",
       "inBundle": true,
       "license": "ISC"
     },
@@ -1359,6 +1423,19 @@
         "node": ">= 4"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "inBundle": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "inBundle": true,
+      "license": "MIT"
+    },
     "node_modules/aws-cdk-lib/node_modules/jsonfile": {
       "version": "6.1.0",
       "inBundle": true,
@@ -1378,6 +1455,11 @@
         "node": "*"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/lodash.truncate": {
+      "version": "4.4.2",
+      "inBundle": true,
+      "license": "MIT"
+    },
     "node_modules/aws-cdk-lib/node_modules/lru-cache": {
       "version": "6.0.0",
       "inBundle": true,
@@ -1401,15 +1483,23 @@
       }
     },
     "node_modules/aws-cdk-lib/node_modules/punycode": {
-      "version": "2.2.0",
+      "version": "2.3.0",
       "inBundle": true,
       "license": "MIT",
       "engines": {
         "node": ">=6"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/require-from-string": {
+      "version": "2.0.2",
+      "inBundle": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/aws-cdk-lib/node_modules/semver": {
-      "version": "7.3.8",
+      "version": "7.5.1",
       "inBundle": true,
       "license": "ISC",
       "dependencies": {
@@ -1422,6 +1512,61 @@
         "node": ">=10"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/slice-ansi": {
+      "version": "4.0.0",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "astral-regex": "^2.0.0",
+        "is-fullwidth-code-point": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/slice-ansi?sponsor=1"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/string-width": {
+      "version": "4.2.3",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/aws-cdk-lib/node_modules/table": {
+      "version": "6.8.1",
+      "inBundle": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "ajv": "^8.0.1",
+        "lodash.truncate": "^4.4.2",
+        "slice-ansi": "^4.0.0",
+        "string-width": "^4.2.3",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
     "node_modules/aws-cdk-lib/node_modules/universalify": {
       "version": "2.0.0",
       "inBundle": true,
@@ -1430,6 +1575,14 @@
         "node": ">= 10.0.0"
       }
     },
+    "node_modules/aws-cdk-lib/node_modules/uri-js": {
+      "version": "4.4.1",
+      "inBundle": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "punycode": "^2.1.0"
+      }
+    },
     "node_modules/aws-cdk-lib/node_modules/yallist": {
       "version": "4.0.0",
       "inBundle": true,
@@ -4020,9 +4173,9 @@
       }
     },
     "@aws-cdk/asset-awscli-v1": {
-      "version": "2.2.52",
-      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.52.tgz",
-      "integrity": "sha512-9dBPrvByWrUOcs5Rjwv08FWSummo1Uk/EgE3dCFDqvIqlSTudEmu6TGU3zrs00rfcAjqDv6gBuSttzG5f9tfdQ=="
+      "version": "2.2.196",
+      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.196.tgz",
+      "integrity": "sha512-F8hU1rEzYS7z5Dt2s+ttd0/jMvPuUE9BcXexgq+dIOLuZsRpDwNnkMBtjNaJXJS48ZGJ2X4b8VlklseepdtoSA=="
     },
     "@aws-cdk/asset-kubectl-v20": {
       "version": "2.1.1",
@@ -4030,9 +4183,9 @@
       "integrity": "sha512-U1ntiX8XiMRRRH5J1IdC+1t5CE89015cwyt5U63Cpk0GnMlN5+h9WsWMlKlPXZR4rdq/m806JRlBMRpBUB2Dhw=="
     },
     "@aws-cdk/asset-node-proxy-agent-v5": {
-      "version": "2.0.42",
-      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v5/-/asset-node-proxy-agent-v5-2.0.42.tgz",
-      "integrity": "sha512-PxvP1UU2xa4k3Ea78DxAYY8ADvwWZ/nPu+xsjQLsT+MP+aB3RZ3pGc/fNlH7Rg56Zyb/j3GSdihAy4Oi5xa+TQ=="
+      "version": "2.0.165",
+      "resolved": "https://registry.npmjs.org/@aws-cdk/asset-node-proxy-agent-v5/-/asset-node-proxy-agent-v5-2.0.165.tgz",
+      "integrity": "sha512-bsyLQD/vqXQcc9RDmlM1XqiFNO/yewgVFXmkMcQkndJbmE/jgYkzewwYGrBlfL725hGLQipXq19+jwWwdsXQqg=="
     },
     "@babel/code-frame": {
       "version": "7.18.6",
@@ -4995,21 +5148,22 @@
       }
     },
     "aws-cdk-lib": {
-      "version": "2.62.2",
-      "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.62.2.tgz",
-      "integrity": "sha512-ynyoEFQckICFJzbUd89pWjol3GGbxRF05E8BCPEyy++vLHJZdqaJxRL4REl4lrdznnkb1kvxtBSGg4cOkR4o3w==",
+      "version": "2.80.0",
+      "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.80.0.tgz",
+      "integrity": "sha512-PoqD3Yms5I0ajuTi071nTW/hpkH3XsdyZzn5gYsPv0qD7mqP3h6Qr+6RiGx+yQ1KcVFyxWdX15uK+DsC0KwvcQ==",
       "requires": {
-        "@aws-cdk/asset-awscli-v1": "^2.2.49",
+        "@aws-cdk/asset-awscli-v1": "^2.2.177",
         "@aws-cdk/asset-kubectl-v20": "^2.1.1",
-        "@aws-cdk/asset-node-proxy-agent-v5": "^2.0.38",
+        "@aws-cdk/asset-node-proxy-agent-v5": "^2.0.148",
         "@balena/dockerignore": "^1.0.2",
         "case": "1.6.3",
-        "fs-extra": "^9.1.0",
+        "fs-extra": "^11.1.1",
         "ignore": "^5.2.4",
         "jsonschema": "^1.4.1",
         "minimatch": "^3.1.2",
-        "punycode": "^2.2.0",
-        "semver": "^7.3.8",
+        "punycode": "^2.3.0",
+        "semver": "^7.5.1",
+        "table": "^6.8.1",
         "yaml": "1.10.2"
       },
       "dependencies": {
@@ -5017,8 +5171,29 @@
           "version": "1.0.2",
           "bundled": true
         },
-        "at-least-node": {
-          "version": "1.0.0",
+        "ajv": {
+          "version": "8.12.0",
+          "bundled": true,
+          "requires": {
+            "fast-deep-equal": "^3.1.1",
+            "json-schema-traverse": "^1.0.0",
+            "require-from-string": "^2.0.2",
+            "uri-js": "^4.2.2"
+          }
+        },
+        "ansi-regex": {
+          "version": "5.0.1",
+          "bundled": true
+        },
+        "ansi-styles": {
+          "version": "4.3.0",
+          "bundled": true,
+          "requires": {
+            "color-convert": "^2.0.1"
+          }
+        },
+        "astral-regex": {
+          "version": "2.0.0",
           "bundled": true
         },
         "balanced-match": {
@@ -5037,28 +5212,54 @@
           "version": "1.6.3",
           "bundled": true
         },
+        "color-convert": {
+          "version": "2.0.1",
+          "bundled": true,
+          "requires": {
+            "color-name": "~1.1.4"
+          }
+        },
+        "color-name": {
+          "version": "1.1.4",
+          "bundled": true
+        },
         "concat-map": {
           "version": "0.0.1",
           "bundled": true
         },
+        "emoji-regex": {
+          "version": "8.0.0",
+          "bundled": true
+        },
+        "fast-deep-equal": {
+          "version": "3.1.3",
+          "bundled": true
+        },
         "fs-extra": {
-          "version": "9.1.0",
+          "version": "11.1.1",
           "bundled": true,
           "requires": {
-            "at-least-node": "^1.0.0",
             "graceful-fs": "^4.2.0",
             "jsonfile": "^6.0.1",
             "universalify": "^2.0.0"
           }
         },
         "graceful-fs": {
-          "version": "4.2.10",
+          "version": "4.2.11",
           "bundled": true
         },
         "ignore": {
           "version": "5.2.4",
           "bundled": true
         },
+        "is-fullwidth-code-point": {
+          "version": "3.0.0",
+          "bundled": true
+        },
+        "json-schema-traverse": {
+          "version": "1.0.0",
+          "bundled": true
+        },
         "jsonfile": {
           "version": "6.1.0",
           "bundled": true,
@@ -5071,6 +5272,10 @@
           "version": "1.4.1",
           "bundled": true
         },
+        "lodash.truncate": {
+          "version": "4.4.2",
+          "bundled": true
+        },
         "lru-cache": {
           "version": "6.0.0",
           "bundled": true,
@@ -5086,20 +5291,67 @@
           }
         },
         "punycode": {
-          "version": "2.2.0",
+          "version": "2.3.0",
+          "bundled": true
+        },
+        "require-from-string": {
+          "version": "2.0.2",
           "bundled": true
         },
         "semver": {
-          "version": "7.3.8",
+          "version": "7.5.1",
           "bundled": true,
           "requires": {
             "lru-cache": "^6.0.0"
           }
         },
+        "slice-ansi": {
+          "version": "4.0.0",
+          "bundled": true,
+          "requires": {
+            "ansi-styles": "^4.0.0",
+            "astral-regex": "^2.0.0",
+            "is-fullwidth-code-point": "^3.0.0"
+          }
+        },
+        "string-width": {
+          "version": "4.2.3",
+          "bundled": true,
+          "requires": {
+            "emoji-regex": "^8.0.0",
+            "is-fullwidth-code-point": "^3.0.0",
+            "strip-ansi": "^6.0.1"
+          }
+        },
+        "strip-ansi": {
+          "version": "6.0.1",
+          "bundled": true,
+          "requires": {
+            "ansi-regex": "^5.0.1"
+          }
+        },
+        "table": {
+          "version": "6.8.1",
+          "bundled": true,
+          "requires": {
+            "ajv": "^8.0.1",
+            "lodash.truncate": "^4.4.2",
+            "slice-ansi": "^4.0.0",
+            "string-width": "^4.2.3",
+            "strip-ansi": "^6.0.1"
+          }
+        },
         "universalify": {
           "version": "2.0.0",
           "bundled": true
         },
+        "uri-js": {
+          "version": "4.4.1",
+          "bundled": true,
+          "requires": {
+            "punycode": "^2.1.0"
+          }
+        },
         "yallist": {
           "version": "4.0.0",
           "bundled": true
diff --git a/deployment/cdk/opensearch-service-migration/package.json b/deployment/cdk/opensearch-service-migration/package.json
index 021bde4d9..77e2e83b0 100644
--- a/deployment/cdk/opensearch-service-migration/package.json
+++ b/deployment/cdk/opensearch-service-migration/package.json
@@ -20,7 +20,7 @@
     "typescript": "~4.9.4"
   },
   "dependencies": {
-    "aws-cdk-lib": "2.62.2",
+    "aws-cdk-lib": "2.80.0",
     "constructs": "^10.0.0",
     "source-map-support": "^0.5.21"
   }

From 2fbf826b408473d20dee4264c419220fa5b1445f Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 23 Jun 2023 10:32:02 -0500
Subject: [PATCH 39/57] Add github workflow

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/gradle.yml | 32 ++++++++++++++++++++++++++++++++
 TrafficCapture/build.gradle  | 12 ++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 .github/workflows/gradle.yml

diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml
new file mode 100644
index 000000000..b11202675
--- /dev/null
+++ b/.github/workflows/gradle.yml
@@ -0,0 +1,32 @@
+name: Gradle Build and Test
+
+on: [push, pull_request]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK 11
+        uses: actions/setup-java@v2
+        with:
+          java-version: '11'
+          distribution: 'adopt'
+
+      - name: Run Gradle Build
+        run: ./gradlew assemble
+        working-directory: TrafficCapture
+
+      - name: Run Tests with Coverage
+        run: ./gradlew test jacocoTestReport
+        working-directory: TrafficCapture
+
+      - name: Upload to Codecov
+        uses: codecov/codecov-action@v1
+        with:
+          file: ./TrafficCapture/*/build/reports/jacoco/test/jacocoTestReport.xml
+          flags: unittests
+          name: codecov-umbrella
+          fail_ci_if_error: true
diff --git a/TrafficCapture/build.gradle b/TrafficCapture/build.gradle
index 7c5804c24..a349b1cc5 100644
--- a/TrafficCapture/build.gradle
+++ b/TrafficCapture/build.gradle
@@ -1,5 +1,6 @@
 allprojects {
     apply plugin: 'java'
+    apply plugin: 'jacoco'
 
     java {
         toolchain {
@@ -9,4 +10,15 @@ allprojects {
     test {
         jvmArgs '-ea'
     }
+
+    jacocoTestReport {
+        reports {
+            xml.required = true
+            xml.destination file("${buildDir}/reports/jacoco/test/jacocoTestReport.xml")
+        }
+    }
+}
+
+test {
+    finalizedBy 'jacocoTestReport'
 }

From c64a02ec87aecd561fe26929e265fa3a04678235 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 23 Jun 2023 10:39:10 -0500
Subject: [PATCH 40/57] Update codecov version

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/gradle.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml
index b11202675..80fb63491 100644
--- a/.github/workflows/gradle.yml
+++ b/.github/workflows/gradle.yml
@@ -24,9 +24,8 @@ jobs:
         working-directory: TrafficCapture
 
       - name: Upload to Codecov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           file: ./TrafficCapture/*/build/reports/jacoco/test/jacocoTestReport.xml
           flags: unittests
-          name: codecov-umbrella
           fail_ci_if_error: true

From 9e9fa8faffeeaa88d8d3264398d167f82f179a9a Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 23 Jun 2023 10:40:19 -0500
Subject: [PATCH 41/57] run gradle build instead of assemble

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/gradle.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml
index 80fb63491..f62736055 100644
--- a/.github/workflows/gradle.yml
+++ b/.github/workflows/gradle.yml
@@ -16,7 +16,7 @@ jobs:
           distribution: 'adopt'
 
       - name: Run Gradle Build
-        run: ./gradlew assemble
+        run: ./gradlew build
         working-directory: TrafficCapture
 
       - name: Run Tests with Coverage

From 05ffab5d9dd2797c204f5662ce6b424021ed5c52 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Fri, 23 Jun 2023 11:17:42 -0500
Subject: [PATCH 42/57] Wildcard isn't supported - add each cov file manually

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/gradle.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml
index f62736055..5fe7bdea4 100644
--- a/.github/workflows/gradle.yml
+++ b/.github/workflows/gradle.yml
@@ -26,6 +26,11 @@ jobs:
       - name: Upload to Codecov
         uses: codecov/codecov-action@v3
         with:
-          file: ./TrafficCapture/*/build/reports/jacoco/test/jacocoTestReport.xml
+          files: TrafficCapture/captureKafkaOffloader/build/reports/jacoco/test/jacocoTestReport.xml,
+            TrafficCapture/captureOffloader/build/reports/jacoco/test/jacocoTestReport.xml,
+            TrafficCapture/KafkaPrinter/build/reports/jacoco/test/jacocoTestReport.xml,
+            TrafficCapture/nettyWireLogging/build/reports/jacoco/test/jacocoTestReport.xml,
+            TrafficCapture/trafficCaptureProxyServer/build/reports/jacoco/test/jacocoTestReport.xml,
+            TrafficCapture/trafficReplayer/build/reports/jacoco/test/jacocoTestReport.xml
           flags: unittests
-          fail_ci_if_error: true
+          fail_ci_if_error: false

From 5b13c59eadebd1c71ebcd356ca907c73b3a9d2ae Mon Sep 17 00:00:00 2001
From: Tanner Lewis <lewijacn@amazon.com>
Date: Fri, 23 Jun 2023 13:59:00 -0400
Subject: [PATCH 43/57] Introduce Copilot Deployments (#201)

* MIGRATIONS-1134: Add initial copilot setup

Signed-off-by: Tanner Lewis <lewijacn@amazon.com>
---
 README.md                                     |   6 +
 TrafficCapture/dockerSolution/build.gradle    |   7 +-
 .../opensearch-service-migration/README.md    |  24 +-
 .../lib/migration-assistance-stack.ts         | 260 +++++++++---------
 deployment/copilot/.workspace                 |   1 +
 deployment/copilot/README.md                  | 107 +++++++
 .../copilot/environments/test/manifest.yml    |  26 ++
 deployment/copilot/kafka-broker/manifest.yml  |  58 ++++
 .../copilot/kafka-puller/addons/taskRole.yml  |  43 +++
 deployment/copilot/kafka-puller/manifest.yml  |  44 +++
 .../copilot/kafka-zookeeper/manifest.yml      |  40 +++
 .../traffic-comparator-jupyter/manifest.yml   |  67 +++++
 .../copilot/traffic-comparator/manifest.yml   |  53 ++++
 .../copilot/traffic-replayer/manifest.yml     |  43 +++
 knowledge_base/README.md                      |   2 +
 15 files changed, 643 insertions(+), 138 deletions(-)
 create mode 100644 deployment/copilot/.workspace
 create mode 100644 deployment/copilot/README.md
 create mode 100644 deployment/copilot/environments/test/manifest.yml
 create mode 100644 deployment/copilot/kafka-broker/manifest.yml
 create mode 100644 deployment/copilot/kafka-puller/addons/taskRole.yml
 create mode 100644 deployment/copilot/kafka-puller/manifest.yml
 create mode 100644 deployment/copilot/kafka-zookeeper/manifest.yml
 create mode 100644 deployment/copilot/traffic-comparator-jupyter/manifest.yml
 create mode 100644 deployment/copilot/traffic-comparator/manifest.yml
 create mode 100644 deployment/copilot/traffic-replayer/manifest.yml

diff --git a/README.md b/README.md
index 8d9125fed..3e4ec6ebb 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,12 @@ each and every test where additions made to the endpoints are deleted, *mostly*
 as we log all operations going through the proxy (which is capturing the traffic), those are only being 
 deleted after the Docker solution is shut down.
 
+## Deploying to AWS with Copilot
+
+The containerized services that this repo uses can be deployed to AWS with the use of [Copilot](https://aws.github.io/copilot-cli/)
+
+Documentation for getting started and deploying these services can be found [here](deployment/copilot/README.md)
+
 ## Security
 
 See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
diff --git a/TrafficCapture/dockerSolution/build.gradle b/TrafficCapture/dockerSolution/build.gradle
index 5042bea38..5d77fd62b 100644
--- a/TrafficCapture/dockerSolution/build.gradle
+++ b/TrafficCapture/dockerSolution/build.gradle
@@ -76,7 +76,11 @@ trafficComparatorServices.forEach {projectName, dockerImageName ->
         dependsOn(tasks.getByName('cloneComparatorRepoIfNeeded'))
         from REALIZED_TRAFFIC_COMPARATOR_DIRECTORY
         into dockerBuildDir
-        include 'setup.py'
+        include '*.py'
+        include '/traffic_comparator/*'
+        if (projectName == 'jupyterNotebook') {
+            include '*.ipynb'
+        }
     }
 
     task "createDockerfile_${projectName}"(type: com.bmuschko.gradle.docker.tasks.image.Dockerfile) {
@@ -85,6 +89,7 @@ trafficComparatorServices.forEach {projectName, dockerImageName ->
         from 'python:3.10.10'
         runCommand("apt-get update && apt-get install -y netcat lsof")
         copyFile("setup.py", "/setup.py")
+        copyFile(".", "/containerTC/")
         runCommand("pip3 install --editable \".[data]\"")
         // container stay-alive
         defaultCommand('tail', '-f', '/dev/null')
diff --git a/deployment/cdk/opensearch-service-migration/README.md b/deployment/cdk/opensearch-service-migration/README.md
index 136801d6f..6478ccc37 100644
--- a/deployment/cdk/opensearch-service-migration/README.md
+++ b/deployment/cdk/opensearch-service-migration/README.md
@@ -4,18 +4,34 @@ This repo contains an IaC CDK solution for deploying an OpenSearch Service Domai
 
 ### Getting Started
 
-If this is your first time using CDK in this region, will need to `cdk bootstrap` to setup required CDK resources for deployment
+#### First time using CDK?
 
-Also ensure you have configured the desired [AWS credentials](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_prerequisites), as these will dictate the region and account used for deployment
+You can install the CDK CLI tool by running:
+```
+npm install -g aws-cdk
+```
+
+You then will need to configure the desired [AWS credentials](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_prerequisites), as these will dictate the region and account used for deployment.
+
+Next if you have not run CDK previously in the configured region of you account, it is necessary to run the following command to set up a small CloudFormation stack of resources that CDK needs to function within your account
+```
+cdk bootstrap
+```
+
+Further CDK documentation [here](https://docs.aws.amazon.com/cdk/v2/guide/cli.html)
+
+#### Project required setup
+
+It is necessary to run `npm install` within this current directory to install required packages that this app and CDK need for operation.
 
-A `CDK_DEPLOYMENT_STAGE` environment variable should also be set to assist in naming resources and preventing collisions. Typically, this would be set to values such as `dev`, `gamma`, `Wave1`, `PROD` and will be used to distinguish AWS resources for a given region and deployment stage. For example the CloudFormation stack may be named like `OSServiceDomain-dev-us-east-1`. This stage environment variable should only be used for the disambiguation of user resources.
+A `CDK_DEPLOYMENT_STAGE` environment variable must be set to assist in naming resources and preventing collisions. Typically, this would be set to a value such as `dev`, `gamma`, `Wave1`, `PROD` and will be used to distinguish AWS resources for a given region and deployment stage. For example the CloudFormation stack may be named like `OSServiceDomain-dev-us-east-1`. This stage environment variable should only be used for the disambiguation of user resources.
 
 ### Deploying your CDK
 Before deploying your CDK you should fill in any desired context parameters that will dictate the composition of your OpenSearch Service Domain
 
 This can be accomplished by providing these options in a `cdk.context.json` file
 
-As well as by passing the context options you want to change as options in the CDK CLI
+Or by passing the context options you want to change as options in the CDK CLI
 ```
 cdk deploy "*" --c domainName="os-service-domain" --c engineVersion="OS_1_3_6" --c dataNodeType="r6g.large.search" --c dataNodeCount=1
 ```
diff --git a/deployment/cdk/opensearch-service-migration/lib/migration-assistance-stack.ts b/deployment/cdk/opensearch-service-migration/lib/migration-assistance-stack.ts
index b29d190e8..518fc4d76 100644
--- a/deployment/cdk/opensearch-service-migration/lib/migration-assistance-stack.ts
+++ b/deployment/cdk/opensearch-service-migration/lib/migration-assistance-stack.ts
@@ -1,4 +1,4 @@
-import {Stack, StackProps} from "aws-cdk-lib";
+import {CfnOutput, Stack, StackProps} from "aws-cdk-lib";
 import {
     Instance,
     InstanceClass,
@@ -9,11 +9,9 @@ import {
     SecurityGroup,
     SubnetType
 } from "aws-cdk-lib/aws-ec2";
+import {FileSystem} from 'aws-cdk-lib/aws-efs';
 import {Construct} from "constructs";
-import {Cluster, ContainerImage, FargateService, FargateTaskDefinition, LogDrivers} from "aws-cdk-lib/aws-ecs";
-import {DockerImageAsset} from "aws-cdk-lib/aws-ecr-assets";
-import {join} from "path";
-import {Effect, PolicyDocument, PolicyStatement, Role, ServicePrincipal} from "aws-cdk-lib/aws-iam";
+import {CfnCluster, CfnConfiguration} from "aws-cdk-lib/aws-msk";
 
 export interface migrationStackProps extends StackProps {
     readonly vpc: IVpc,
@@ -29,140 +27,119 @@ export class MigrationAssistanceStack extends Stack {
     constructor(scope: Construct, id: string, props: migrationStackProps) {
         super(scope, id, props);
 
-        // Create IAM policy to connect to cluster
-        const MSKConsumerPolicyConnect = new PolicyStatement({
-            effect: Effect.ALLOW,
-            actions: ["kafka-cluster:Connect",
-                "kafka-cluster:AlterCluster",
-                "kafka-cluster:DescribeCluster"],
-            resources: [props.MSKARN]
+        // Create MSK cluster config
+        const mskClusterConfig = new CfnConfiguration(this, "migrationMSKClusterConfig", {
+            name: 'migration-msk-config',
+            serverProperties: `
+                auto.create.topics.enable=true
+            `
         })
 
-        // Create IAM policy to read/write from kafka topics on the cluster
-        const policyRegex1 = /([^//]+$)/gi
-        const policyRegex2 = /:(cluster)/gi
-        const policyShortARN = props.MSKARN.replace(policyRegex1, "*");
-        const topicARN = policyShortARN.replace(policyRegex2, ":topic");
-
-        const MSKConsumerPolicyReadAndWrite = new PolicyStatement({
-            effect: Effect.ALLOW,
-            actions: ["kafka-cluster:*Topic*",
-                "kafka-cluster:WriteData",
-                "kafka-cluster:ReadData"],
-            resources: [topicARN]
-        })
-
-        // Create IAM policy to join Kafka consumer groups
-        let groupARN = policyShortARN.replace(policyRegex2, ":group");
-        const MSKConsumerPolicyGroup = new PolicyStatement({
-            effect: Effect.ALLOW,
-            actions: ["kafka-cluster:AlterGroup",
-                "kafka-cluster:DescribeGroup"],
-            resources: [groupARN]
-        })
-
-        const MSKConsumerAccessDoc = new PolicyDocument({
-            statements: [MSKConsumerPolicyConnect, MSKConsumerPolicyReadAndWrite, MSKConsumerPolicyGroup]
-        })
-
-        // Create IAM Role for Fargate Task to read from MSK Topic
-        const MSKConsumerRole = new Role(this, 'MSKConsumerRole', {
-            assumedBy: new ServicePrincipal('ecs-tasks.amazonaws.com'),
-            description: 'Allow Fargate container to consume from MSK',
-            inlinePolicies: {
-                ReadMSKTopic: MSKConsumerAccessDoc,
+        // Create an MSK cluster
+        const mskCluster = new CfnCluster(this, 'migrationMSKCluster', {
+            clusterName: 'migration-msk-cluster',
+            kafkaVersion: '2.8.1',
+            numberOfBrokerNodes: 2,
+            brokerNodeGroupInfo: {
+                instanceType: 'kafka.m5.large',
+                clientSubnets: props.vpc.selectSubnets({subnetType: SubnetType.PUBLIC}).subnetIds,
+                connectivityInfo: {
+                    // Public access cannot be enabled on cluster creation
+                    publicAccess: {
+                        type: "DISABLED"
+                    }
+                },
             },
+            configurationInfo: {
+                arn: mskClusterConfig.attrArn,
+                // This is temporary, need way to dynamically get latest
+                revision: 1
+            },
+            encryptionInfo: {
+                encryptionInTransit: {
+                    clientBroker: 'TLS',
+                    inCluster: true
+                },
+            },
+            enhancedMonitoring: 'DEFAULT',
+            clientAuthentication: {
+                sasl: {
+                    iam: {
+                        enabled: true
+                    }
+                },
+                unauthenticated: {
+                    enabled: false
+                }
+            }
         });
+        mskCluster.addDependency(mskClusterConfig)
 
-        const ecsCluster = new Cluster(this, "ecsMigrationCluster", {
-            vpc: props.vpc
-        });
-
-        const migrationFargateTask = new FargateTaskDefinition(this, "migrationFargateTask", {
-            memoryLimitMiB: 2048,
-            cpu: 512,
-            taskRole: MSKConsumerRole
-        });
-
-        // Create MSK Consumer Container
-        const MSKConsumerImage = new DockerImageAsset(this, "MSKConsumerImage", {
-            directory: join(__dirname, "../../../../TrafficCapture"),
-            file: join("kafkaPrinter/docker/Dockerfile")
-        });
-        const MSKConsumerContainer = migrationFargateTask.addContainer("MSKConsumerContainer", {
-            image: ContainerImage.fromDockerImageAsset(MSKConsumerImage),
-            // Add in region and stage
-            containerName: "msk-consumer",
-            environment: {"KAFKA_BOOTSTRAP_SERVERS": props.MSKBrokers.toString(),
-                "KAFKA_TOPIC_NAME": props.MSKTopic},
-            // portMappings: [{containerPort: 9210}],
-            logging: LogDrivers.awsLogs({ streamPrefix: 'msk-consumer-container-lg', logRetention: 30 })
-        });
-
-        // Create Traffic Replayer Container
-        const trafficReplayerImage = new DockerImageAsset(this, "TrafficReplayerImage", {
-            directory: join(__dirname, "../../../../TrafficCapture"),
-            file: join("trafficReplayer/docker/Dockerfile")
-        });
-        const trafficReplayerContainer = migrationFargateTask.addContainer("TrafficReplayerContainer", {
-            image: ContainerImage.fromDockerImageAsset(trafficReplayerImage),
-            // Add in region and stage
-            containerName: "traffic-replayer",
-            environment: {"TARGET_CLUSTER_ENDPOINT": "http://" + props.targetEndpoint + ":80"},
-            logging: LogDrivers.awsLogs({ streamPrefix: 'traffic-replayer-container-lg', logRetention: 30 })
-        });
-
-        // Create Traffic Comparator Container
-        const trafficComparatorImage = new DockerImageAsset(this, "TrafficComparatorImage", {
-            directory: join(__dirname, "../../..", "docker/traffic-comparator")
-            // For local traffic comparator usage, replace directory path with own fs path
-            //directory: "../../../../../mikayla-forks/traffic-comparator",
-            //file: "docker/Dockerfile-trafficcomparator"
-        });
-        const trafficComparatorContainer = migrationFargateTask.addContainer("TrafficComparatorContainer", {
-            image: ContainerImage.fromDockerImageAsset(trafficComparatorImage),
-            // Add in region and stage
-            containerName: "traffic-comparator",
-            environment: {},
-            // portMappings: [{containerPort: 9220}],
-            logging: LogDrivers.awsLogs({ streamPrefix: 'traffic-comparator-container-lg', logRetention: 30 })
-        });
-
-        // To create Jupyter notebook container from local traffic comparator, replace directory path with own fs path
-        // const trafficComparatorJupyterImage = new DockerImageAsset(this, "TrafficComparatorJupyterImage", {
-        //     directory: "../../../../../mikayla-forks/traffic-comparator",
-        //     file: "docker/Dockerfile-trafficcomparator-jupyter"
-        // });
-        // const trafficComparatorJupyterContainer = migrationFargateTask.addContainer("TrafficComparatorJupyterContainer", {
-        //     image: ContainerImage.fromDockerImageAsset(trafficComparatorJupyterImage),
-        //     // Add in region and stage
-        //     containerName: "traffic-comparator-jupyter",
-        //     environment: {},
-        //     portMappings: [{containerPort: 8888}],
-        //     logging: LogDrivers.awsLogs({ streamPrefix: 'traffic-comparator-container-jupyter-lg', logRetention: 30 })
-        // });
-        //
-        // trafficComparatorContainer.addMountPoints({
-        //     containerPath: '/shared',
-        //     sourceVolume: 'shared-traffic-comparator-volume',
-        //     readOnly: false,
-        // });
-        // trafficComparatorJupyterContainer.addMountPoints({
-        //     containerPath: '/shared',
-        //     sourceVolume: 'shared-traffic-comparator-volume',
-        //     readOnly: false,
-        // });
+        // WIP Custom Resources to enable public endpoint for MSK and get the bootstrap broker urls, these may get
+        // combined into an actual lambda implementation in the future
         //
-        // // Mount the shared volume to the container
-        // migrationFargateTask.addVolume({
-        //     name: 'shared-traffic-comparator-volume',
+        // const crPolicyStatement = new PolicyStatement({
+        //     effect: Effect.ALLOW,
+        //     actions: ["ec2:DescribeSubnets",
+        //         "ec2:DescribeVpcs",
+        //         "ec2:DescribeSecurityGroups",
+        //         "ec2:DescribeRouteTables",
+        //         "ec2:DescribeVpcEndpoints",
+        //         "ec2:DescribeVpcAttribute",
+        //         "ec2:DescribeNetworkAcls",
+        //         "kafka:*"],
+        //     resources: ["*"]
+        // })
+        // const crPolicy = AwsCustomResourcePolicy.fromStatements([crPolicyStatement])
+        // const mskPublicEndpointCustomResource = new AwsCustomResource(this, 'MigrationMSKPublicEndpointCR', {
+        //     onCreate: {
+        //         service: 'Kafka',
+        //         action: 'updateConnectivity',
+        //         parameters: {
+        //             ClusterArn: mskCluster.attrArn,
+        //             CurrentVersion: "K3P5ROKL5A1OLE",
+        //             ConnectivityInfo: {
+        //                 PublicAccess: {
+        //                     Type: 'SERVICE_PROVIDED_EIPS'
+        //                 }
+        //             }
+        //         },
+        //         physicalResourceId: PhysicalResourceId.of(Date.now().toString())
+        //     },
+        //     policy: crPolicy,
+        //     vpc: props.vpc
+        // })
+
+        // const mskPublicEndpointCustomResource = new AwsCustomResource(this, 'MigrationMSKPublicEndpointCR', {
+        //     onCreate: {
+        //         service: 'Kafka',
+        //         action: 'getBootstrapBrokers',
+        //         parameters: {
+        //             ClusterArn: mskCluster.attrArn,
+        //             Outputs: {
+        //                 CustomOutput: 'customOutputValue'
+        //             }
+        //         },
+        //         physicalResourceId: PhysicalResourceId.of(Date.now().toString())
+        //     },
+        //     policy: AwsCustomResourcePolicy.fromSdkCalls({resources: [mskCluster.attrArn]}),
+        //     vpc: props.vpc
+        // })
+
+        // new CfnOutput(this, 'MSKBrokerOutput', {
+        //     value: mskPublicEndpointCustomResource.getResponseField("Outputs.CustomOutput")
         // });
 
-        // Create Fargate Service
-        const migrationFargateService = new FargateService(this, "migrationFargateService", {
-            cluster: ecsCluster,
-            taskDefinition: migrationFargateTask,
-            desiredCount: 1
+        const comparatorSQLiteSG = new SecurityGroup(this, 'comparatorSQLiteSG', {
+            vpc: props.vpc,
+            allowAllOutbound: true,
+        });
+        comparatorSQLiteSG.addIngressRule(comparatorSQLiteSG, Port.allTraffic());
+
+        // Create an EFS file system for the traffic-comparator
+        const comparatorSQLiteEFS = new FileSystem(this, 'comparatorSQLiteEFS', {
+            vpc: props.vpc,
+            securityGroup: comparatorSQLiteSG
         });
 
         // Creates a security group with open access via ssh
@@ -179,8 +156,25 @@ export class MigrationAssistanceStack extends Stack {
             instanceType: InstanceType.of(InstanceClass.T2, InstanceSize.MICRO),
             machineImage: MachineImage.latestAmazonLinux(),
             securityGroup: oinoSecurityGroup,
-            // Manually created for now, to be automated soon
-            keyName: "es-node-key"
+            // Manually created for now, to be automated in future
+            //keyName: "es-node-key"
+        });
+
+        // This is a temporary fragile piece to help with importing values from CDK to Copilot. It assumes the provided VPC has two public subnets and
+        // does not currently provide the MSK broker endpoints as a future Custom Resource is needed to accomplish this
+        const exports = [
+            `export MIGRATION_VPC_ID=${props.vpc.vpcId}`,
+            `export MIGRATION_PUBLIC_SUBNET_1=${props.vpc.publicSubnets[0].subnetId}`,
+            `export MIGRATION_PUBLIC_SUBNET_2=${props.vpc.publicSubnets[1].subnetId}`,
+            `export MIGRATION_DOMAIN_ENDPOINT=${props.targetEndpoint}`,
+            `export MIGRATION_COMPARATOR_EFS_ID=${comparatorSQLiteEFS.fileSystemId}`,
+            `export MIGRATION_COMPARATOR_EFS_SG_ID=${comparatorSQLiteSG.securityGroupId}`,
+            `export MIGRATION_KAFKA_BROKER_ENDPOINTS=`]
+
+        const cfnOutput = new CfnOutput(this, 'CopilotExports', {
+            value: exports.join(";"),
+            description: 'Exported resource values created by CDK that are needed by Copilot container deployments',
         });
+
     }
 }
\ No newline at end of file
diff --git a/deployment/copilot/.workspace b/deployment/copilot/.workspace
new file mode 100644
index 000000000..c6b1e4053
--- /dev/null
+++ b/deployment/copilot/.workspace
@@ -0,0 +1 @@
+application: migration-copilot
diff --git a/deployment/copilot/README.md b/deployment/copilot/README.md
new file mode 100644
index 000000000..7b4fa7512
--- /dev/null
+++ b/deployment/copilot/README.md
@@ -0,0 +1,107 @@
+### Copilot Deployment
+Copilot is a tool for deploying containerized applications on AWS ECS. Official documentation can be found [here](https://aws.github.io/copilot-cli/docs/overview/).
+
+### Initial Setup
+
+#### Install Prerequisites
+
+###### Docker
+Docker is used by Copilot to build container images. If not installed, follow the steps [here](https://docs.docker.com/engine/install/) to set up. Later versions are recommended.
+###### Git
+Git is used by the opensearch-migrations repo to fetch associated repositories (such as the traffic-comparator repo) for constructing their respective Dockerfiles. Steps to set up can be found [here](https://github.com/git-guides/install-git).
+###### Java 11
+Java is used by the opensearch-migrations repo and Gradle, its associated build tool. The current required version is Java 11.
+
+#### Creating Dockerfiles
+This project needs to build the required Dockerfiles that Copilot will use in its services. From the `TrafficCapture` directory the following command can be ran to build these files
+```
+./gradlew :dockerSolution:buildDockerImages
+```
+More details can be found [here](../../TrafficCapture/dockerSolution/README.md)
+
+#### Setting up Copilot CLI
+If you are on Mac the following Homebrew command can be run to set up the Copilot CLI:
+```
+brew install aws/tap/copilot-cli
+```
+Otherwise, please follow the manual instructions [here](https://aws.github.io/copilot-cli/docs/getting-started/install/)
+
+
+#### Importing values from CDK
+The typical use case for this Copilot app is to initially use the `opensearch-service-migration` CDK to deploy the surrounding infrastructure (VPC, OpenSearch Domain, Managed Kafka (MSK)) that Copilot requires, and then deploy the desired Copilot services. Documentation for setting up and deploying these resources can be found in the CDK [README](../cdk/opensearch-service-migration/README.md).
+
+The provided CDK will output export commands once deployed that can be ran on a given deployment machine to meet the required environment variables this Copilot app uses:
+```
+export MIGRATION_VPC_ID=vpc-123;
+export MIGRATION_PUBLIC_SUBNET_1=subnet-123;
+export MIGRATION_PUBLIC_SUBNET_2=subnet-124;
+export MIGRATION_DOMAIN_ENDPOINT=vpc-aos-domain-123.us-east-1.es.amazonaws.com;
+export MIGRATION_COMPARATOR_EFS_ID=fs-123;
+export MIGRATION_COMPARATOR_EFS_SG_ID=sg-123;
+export MIGRATION_KAFKA_BROKER_ENDPOINTS=b-1-public.loggingmskcluster.123.45.kafka.us-east-1.amazonaws.com:9198,b-2-public.loggingmskcluster.123.46.kafka.us-east-1.amazonaws.com:9198
+```
+
+#### Setting up existing Copilot infrastructure
+
+It is **important** to run any `copilot` commands from within this directory (`deployment/copilot`). When components are initialized the name given will be searched for in the immediate directory structure to look for an existing `manifest.yml` for that component. If found it will use the existing manifest and not create its own. This Copilot app already has existing manifests for each of its services and a test environment, which should be used for proper operation.
+
+When initially setting up Copilot, each component (apps, services, and environments) need to be initialized. Beware when initializing an environment in Copilot, it will prompt you for values even if you've defined them in the `manifest.yml`, though values input at the prompt are ignored in favor of what was specified in the file.
+
+If using temporary environment credentials when initializing an environment:
+* Copilot will prompt you to enter each variable (AWS Access Key ID, AWS Secret Access Key, AWS Session Token). If these variables are already available in your environment, these three prompts can be `enter`'d through and ignored. 
+* When prompted ` Would you like to use the default configuration for a new environment?` select `Yes, use default.` as this will ultimately get ignored for what has been configured in the existing `manifest.yml`
+* The last prompt will ask for the desired deployment region and should be filled out as Copilot will store this internally.
+
+**Note**: This app also contains `kafka-broker` and `kafka-zookeeper` services which are currently experimental and usage of MSK is preferred. These services do not need to be deployed, and as so are not listed below.
+```
+// Initialize app
+copilot app init
+
+// Initialize env with required "test" name
+// Be cautious to specify the proper region as this will dictate where resources are deployed
+copilot env init --name test
+
+// Initialize services with their respective required name
+copilot svc init --name kafka-puller
+copilot svc init --name traffic-replayer
+copilot svc init --name traffic-comparator
+copilot svc init --name traffic-comparator-jupyter
+
+```
+
+### Deploying Services to an Environment
+When deploying a service with the Copilot CLI, a status bar will be displayed that gets updated as the deployment progresses. The command will complete when the specific service has all its resources created and health checks are passing on the deployed containers.
+
+Currently, it seems that Copilot does not support deploying all services at once (issue [here](https://github.com/aws/copilot-cli/issues/3474)) or creating dependencies between separate services. In light of this, services need to be deployed one at a time as show below.
+
+```
+// Deploy environment
+copilot env deploy --name test
+
+// Deploy services to a deployed environment
+copilot svc deploy --name traffic-comparator-jupyter --env test
+copilot svc deploy --name traffic-comparator --env test
+copilot svc deploy --name traffic-replayer --env test
+copilot svc deploy --name kafka-puller --env test
+```
+
+### Executing Commands on a Deployed Service
+
+A command shell can be opened in the service's container if that service has enabled `exec: true` in their `manifest.yml` and the SSM Session Manager plugin is installed when prompted.
+```
+copilot svc exec traffic-comparator-jupyter --container traffic-comparator-jupyter --command "bash"
+copilot svc exec traffic-comparator --container traffic-comparator --command "bash"
+copilot svc exec traffic-replayer --container traffic-replayer --command "bash"
+copilot svc exec kafka-puller --container kafka-puller --command "bash"
+```
+
+### Addons
+
+Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the [kafka-puller](kafka-puller/addons/taskRole.yml) service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK.
+
+Official documentation on Addons can be found [here](https://aws.github.io/copilot-cli/docs/developing/addons/workload/).
+
+### Useful Commands
+
+`copilot app show`: Provides details on the current app \
+`copilot svc show`: Provides details on a particular service
diff --git a/deployment/copilot/environments/test/manifest.yml b/deployment/copilot/environments/test/manifest.yml
new file mode 100644
index 000000000..f97c5a3fb
--- /dev/null
+++ b/deployment/copilot/environments/test/manifest.yml
@@ -0,0 +1,26 @@
+# The manifest for the "test" environment.
+# Read the full specification for the "Environment" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/environment/
+
+# Your environment name will be used in naming your resources like VPC, cluster, etc.
+name: test
+type: Environment
+
+# Import your own VPC and subnets or configure how they should be created.
+network:
+ vpc:
+   id: ${MIGRATION_VPC_ID}
+   subnets:
+     public:
+       - id: ${MIGRATION_PUBLIC_SUBNET_1}
+       - id: ${MIGRATION_PUBLIC_SUBNET_2}
+
+
+# Configure the load balancers in your environment, once created.
+# http:
+#   public:
+#   private:
+
+# Configure observability for your environment resources.
+observability:
+  container_insights: false
diff --git a/deployment/copilot/kafka-broker/manifest.yml b/deployment/copilot/kafka-broker/manifest.yml
new file mode 100644
index 000000000..f58a0d7a3
--- /dev/null
+++ b/deployment/copilot/kafka-broker/manifest.yml
@@ -0,0 +1,58 @@
+# The manifest for the "kafka-broker" service.
+# Read the full specification for the "Load Balanced Web Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/
+
+# This is a non-essential experimental service to test running Kafka within ECS. It has no volume, is a single broker, and will be recreated in
+# test environment deployments. With this in mind, it should only be used on a test basis.
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: kafka-broker
+type: Load Balanced Web Service
+
+## Distribute traffic to your service.
+http: false
+nlb:
+  port: 9092
+
+# Allow service-to-service communication with ECS Service Connect
+network:
+  connect: true
+
+# Configuration for your containers and service.
+image:
+  location: docker.io/bitnami/kafka:3.4
+  # Port exposed through your container to route traffic to it.
+  port: 9092
+
+cpu: 256       # Number of CPU units for the task.
+memory: 2048   # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+ 
+# Optional fields for more advanced use-cases.
+#
+variables:                    # Pass environment variables as key value pairs.
+  # Definitions for some of these variables can be found in the Bitnami docker documentation here: https://hub.docker.com/r/bitnami/kafka/
+  ALLOW_PLAINTEXT_LISTENER: yes
+  KAFKA_ENABLE_KRAFT: no
+  KAFKA_ZOOKEEPER_CONNECT: kafka-zookeeper:2181
+  # Interfaces that Kafka binds to
+  KAFKA_LISTENERS: PLAINTEXT://:9092
+  # Define the protocol to use per listener name
+  KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT
+  # Metadata passed back to clients, that they will use to connect to brokers
+  # This is currently only accessible within an ECS service that can connect to kafka-broker
+  KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-broker:9092
+
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+      rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/deployment/copilot/kafka-puller/addons/taskRole.yml b/deployment/copilot/kafka-puller/addons/taskRole.yml
new file mode 100644
index 000000000..d669110ca
--- /dev/null
+++ b/deployment/copilot/kafka-puller/addons/taskRole.yml
@@ -0,0 +1,43 @@
+# You can use any of these parameters to create conditions or mappings in your template.
+Parameters:
+  App:
+    Type: String
+    Description: Your application's name.
+  Env:
+    Type: String
+    Description: The environment name your service, job, or workflow is being deployed to.
+  Name:
+    Type: String
+    Description: Your workload's name.
+    
+Resources:
+  MSKConsumerAccessPolicy:
+    Type: AWS::IAM::ManagedPolicy
+    Properties:
+      Description: Allow Fargate container to consume from MSK
+      PolicyDocument:
+        Version: '2012-10-17'
+        # We should enhance IAM policy here to further restrict the Resource
+        Statement:
+          - Action:
+              - kafka-cluster:AlterCluster
+              - kafka-cluster:Connect
+              - kafka-cluster:DescribeCluster
+            Effect: Allow
+            Resource: "*"
+          - Action:
+              - kafka-cluster:*Topic*
+              - kafka-cluster:ReadData
+            Effect: Allow
+            Resource: "*"
+          - Action:
+              - kafka-cluster:AlterGroup
+              - kafka-cluster:DescribeGroup
+            Effect: Allow
+            Resource: "*"
+
+Outputs:
+  # 1. You need to output the IAM ManagedPolicy so that Copilot can add it as a managed policy to your ECS task role.
+  MSKConsumerAccessPolicyArn:
+    Description: "The ARN of the ManagedPolicy to attach to the task role."
+    Value: !Ref MSKConsumerAccessPolicy
\ No newline at end of file
diff --git a/deployment/copilot/kafka-puller/manifest.yml b/deployment/copilot/kafka-puller/manifest.yml
new file mode 100644
index 000000000..957e8611a
--- /dev/null
+++ b/deployment/copilot/kafka-puller/manifest.yml
@@ -0,0 +1,44 @@
+# The manifest for the "kafka-puller" service.
+# Read the full specification for the "Backend Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/backend-service/
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: kafka-puller
+type: Backend Service
+
+# Allow service-to-service communication with ECS Service Connect
+network:
+  connect: true
+
+# Configuration for your containers and service.
+image:
+  # Docker build arguments. For additional overrides: https://aws.github.io/copilot-cli/docs/manifest/backend-service/#image-build
+  build:
+    dockerfile: ../TrafficCapture/dockerSolution/build/docker/kafkaPrinter/Dockerfile
+  # This port is not currently used, but having a port specified was a requirement for using Service Connect
+  port: 9209
+
+command: /bin/sh -c "/runJavaWithClasspath.sh org.opensearch.migrations.KafkaPrinter --group-id default-logging-group --broker-address ${MIGRATION_KAFKA_BROKER_ENDPOINTS} --topic-name logging-traffic-topic | nc -v traffic-replayer 10001"
+
+cpu: 256       # Number of CPU units for the task.
+memory: 512    # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+
+# Optional fields for more advanced use-cases.
+#
+#variables:                    # Pass environment variables as key value pairs.
+#  LOG_LEVEL: info
+
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+       rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/deployment/copilot/kafka-zookeeper/manifest.yml b/deployment/copilot/kafka-zookeeper/manifest.yml
new file mode 100644
index 000000000..ae4f65dd3
--- /dev/null
+++ b/deployment/copilot/kafka-zookeeper/manifest.yml
@@ -0,0 +1,40 @@
+# The manifest for the "kafka-zookeeper" service.
+# Read the full specification for the "Backend Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/backend-service/
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: kafka-zookeeper
+type: Backend Service
+
+# Allow service-to-service communication with ECS Service Connect
+network:
+  connect: true
+
+# Configuration for your containers and service.
+image:
+  location: docker.io/bitnami/zookeeper:3.8
+  port: 2181
+
+cpu: 256       # Number of CPU units for the task.
+memory: 512    # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+variables:                    # Pass environment variables as key value pairs.
+  ALLOW_ANONYMOUS_LOGIN: yes
+
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+
+# Optional fields for more advanced use-cases.
+#
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+      rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/deployment/copilot/traffic-comparator-jupyter/manifest.yml b/deployment/copilot/traffic-comparator-jupyter/manifest.yml
new file mode 100644
index 000000000..4ad5ea765
--- /dev/null
+++ b/deployment/copilot/traffic-comparator-jupyter/manifest.yml
@@ -0,0 +1,67 @@
+# The manifest for the "traffic-comparator-jupyter" service.
+# Read the full specification for the "Load Balanced Web Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: traffic-comparator-jupyter
+type: Load Balanced Web Service
+
+# Distribute traffic to your service.
+http:
+  # Requests to this path will be forwarded to your service.
+  # To match all requests you can use the "/" path.
+  path: '/'
+  # Bogus health check to pass startup
+  healthcheck:
+    path: '/'
+    port: 8888
+    success_codes: '200-405'
+    healthy_threshold: 3
+    unhealthy_threshold: 2
+    interval: 15s
+    timeout: 10s
+    grace_period: 60s
+
+network:
+  vpc:
+    security_groups: [ "${MIGRATION_COMPARATOR_EFS_SG_ID}" ]
+
+# Configuration for your containers and service.
+image:
+  # Docker build arguments. For additional overrides: https://aws.github.io/copilot-cli/docs/manifest/backend-service/#image-build
+  build:
+    dockerfile: ../TrafficCapture/dockerSolution/build/docker/jupyterNotebook/Dockerfile
+  port: 8888
+
+command: /bin/sh -c 'cd containerTC && pip3 install --editable ".[data]" && jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser --allow-root'
+
+storage:
+  volumes:
+    sharedSQLVolume: # This is a variable key and can be set to an arbitrary string.
+      path: '/shared'
+      read_only: false
+      efs:
+        id: ${MIGRATION_COMPARATOR_EFS_ID}
+
+cpu: 256       # Number of CPU units for the task.
+memory: 512    # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+ 
+# Optional fields for more advanced use-cases.
+#
+#variables:                    # Pass environment variables as key value pairs.
+#  LOG_LEVEL: info
+
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+      rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/deployment/copilot/traffic-comparator/manifest.yml b/deployment/copilot/traffic-comparator/manifest.yml
new file mode 100644
index 000000000..87e41039f
--- /dev/null
+++ b/deployment/copilot/traffic-comparator/manifest.yml
@@ -0,0 +1,53 @@
+# The manifest for the "traffic-comparator" service.
+# Read the full specification for the "Backend Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/backend-service/
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: traffic-comparator
+type: Backend Service
+
+# Allow service-to-service communication with ECS Service Connect
+network:
+  connect: true
+  vpc:
+    security_groups: [ "${MIGRATION_COMPARATOR_EFS_SG_ID}" ]
+
+# Configuration for your containers and service.
+image:
+  # Docker build arguments. For additional overrides: https://aws.github.io/copilot-cli/docs/manifest/backend-service/#image-build
+  build:
+    dockerfile: ../TrafficCapture/dockerSolution/build/docker/trafficComparator/Dockerfile
+  port: 9220
+
+command: /bin/sh -c "cd containerTC && pip3 install --editable . && nc -v -l -p 9220 | tee /dev/stderr | trafficcomparator -vv stream | trafficcomparator dump-to-sqlite --db /shared/comparisons.db"
+
+storage:
+  volumes:
+    sharedSQLVolume: # This is a variable key and can be set to an arbitrary string.
+      path: '/shared'
+      read_only: false
+      efs:
+        id: ${MIGRATION_COMPARATOR_EFS_ID}
+
+cpu: 256       # Number of CPU units for the task.
+memory: 512    # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+
+# Optional fields for more advanced use-cases.
+#
+#variables:                    # Pass environment variables as key value pairs.
+#  LOG_LEVEL: info
+
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+      rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/deployment/copilot/traffic-replayer/manifest.yml b/deployment/copilot/traffic-replayer/manifest.yml
new file mode 100644
index 000000000..40bdd1dd1
--- /dev/null
+++ b/deployment/copilot/traffic-replayer/manifest.yml
@@ -0,0 +1,43 @@
+# The manifest for the "traffic-replayer" service.
+# Read the full specification for the "Backend Service" type at:
+#  https://aws.github.io/copilot-cli/docs/manifest/backend-service/
+
+# Your service name will be used in naming your resources like log groups, ECS services, etc.
+name: traffic-replayer
+type: Backend Service
+
+# Allow service-to-service communication with ECS Service Connect
+network:
+  connect: true
+
+# Configuration for your containers and service.
+image:
+  # Docker build arguments. For additional overrides: https://aws.github.io/copilot-cli/docs/manifest/backend-service/#image-build
+  build:
+    dockerfile: ../TrafficCapture/dockerSolution/build/docker/trafficReplayer/Dockerfile
+  port: 10001
+
+command: /bin/sh -c "nc -v -l -p 10001 | /runJavaWithClasspath.sh org.opensearch.migrations.replay.TrafficReplayer http://vpc-aos-domain-chxj2uiujn2bbwsc5avkvzff2m.us-east-1.es.amazonaws.com:80 --insecure | nc traffic-comparator 9220"
+
+cpu: 256       # Number of CPU units for the task.
+memory: 512    # Amount of memory in MiB used by the task.
+count: 1       # Number of tasks that should be running in your service.
+exec: true     # Enable getting a shell to your container (https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html).
+
+# storage:
+  # readonly_fs: true       # Limit to read-only access to mounted root filesystems.
+
+# Optional fields for more advanced use-cases.
+#
+#variables:                    # Pass environment variables as key value pairs.
+#  LOG_LEVEL: info
+
+#secrets:                      # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
+#  GITHUB_TOKEN: GITHUB_TOKEN  # The key is the name of the environment variable, the value is the name of the SSM parameter.
+
+# You can override any of the values defined above by environment.
+environments:
+  test:
+    count: 1               # Number of tasks to run for the "test" environment.
+    deployment:            # The deployment strategy for the "test" environment.
+       rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
\ No newline at end of file
diff --git a/knowledge_base/README.md b/knowledge_base/README.md
index 9370258df..5af75e6d4 100644
--- a/knowledge_base/README.md
+++ b/knowledge_base/README.md
@@ -3,3 +3,5 @@
 The Knowledge Base is the collection of expectations we have about the behavior of clusters of various versions and across upgrades.
 
 It can be used in various ways, but was designed to be a component of the Upgrade Testing Framework. There is more extensive documentation of how it works in [upgrades/README.md](../upgrades/README.md).
+
+Further discussion on adding additional expectations to this knowledge base, can be found in the PR [here](https://github.com/opensearch-project/opensearch-migrations/pull/68)

From 646696ca6815b019ec4003ba437b09b994d65fbd Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 10:50:03 -0500
Subject: [PATCH 44/57] renaming workflow + no longer double executing tests

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/{gradle.yml => gradle-build-and-test.yml} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{gradle.yml => gradle-build-and-test.yml} (92%)

diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle-build-and-test.yml
similarity index 92%
rename from .github/workflows/gradle.yml
rename to .github/workflows/gradle-build-and-test.yml
index 5fe7bdea4..da0d24c10 100644
--- a/.github/workflows/gradle.yml
+++ b/.github/workflows/gradle-build-and-test.yml
@@ -16,11 +16,11 @@ jobs:
           distribution: 'adopt'
 
       - name: Run Gradle Build
-        run: ./gradlew build
+        run: ./gradlew build -x test
         working-directory: TrafficCapture
 
       - name: Run Tests with Coverage
-        run: ./gradlew test jacocoTestReport
+        run: ./gradlew test jacocoTestReport --info
         working-directory: TrafficCapture
 
       - name: Upload to Codecov

From 7ff72cec1020aace76bbe366cfdbdb9a82d6d0e5 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 14:34:24 -0500
Subject: [PATCH 45/57] removing finalized

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 TrafficCapture/build.gradle | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/TrafficCapture/build.gradle b/TrafficCapture/build.gradle
index a349b1cc5..ccefe8896 100644
--- a/TrafficCapture/build.gradle
+++ b/TrafficCapture/build.gradle
@@ -18,7 +18,4 @@ allprojects {
         }
     }
 }
-
-test {
-    finalizedBy 'jacocoTestReport'
-}
+TrafficCapture/*/build/reports/jacoco/test/jacocoTestReport.xml

From 437444ed47f7a31795240517c6e7953fef5adc4a Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 15:05:53 -0500
Subject: [PATCH 46/57] Using wildcards to add coverage report files instead of
 adding individual files

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/gradle-build-and-test.yml | 11 +++--------
 TrafficCapture/build.gradle                 |  1 -
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/gradle-build-and-test.yml b/.github/workflows/gradle-build-and-test.yml
index da0d24c10..6fb1d5bfc 100644
--- a/.github/workflows/gradle-build-and-test.yml
+++ b/.github/workflows/gradle-build-and-test.yml
@@ -3,7 +3,7 @@ name: Gradle Build and Test
 on: [push, pull_request]
 
 jobs:
-  build:
+  gradle-build-and-test:
 
     runs-on: ubuntu-latest
 
@@ -26,11 +26,6 @@ jobs:
       - name: Upload to Codecov
         uses: codecov/codecov-action@v3
         with:
-          files: TrafficCapture/captureKafkaOffloader/build/reports/jacoco/test/jacocoTestReport.xml,
-            TrafficCapture/captureOffloader/build/reports/jacoco/test/jacocoTestReport.xml,
-            TrafficCapture/KafkaPrinter/build/reports/jacoco/test/jacocoTestReport.xml,
-            TrafficCapture/nettyWireLogging/build/reports/jacoco/test/jacocoTestReport.xml,
-            TrafficCapture/trafficCaptureProxyServer/build/reports/jacoco/test/jacocoTestReport.xml,
-            TrafficCapture/trafficReplayer/build/reports/jacoco/test/jacocoTestReport.xml
+          files: "TrafficCapture/**/jacocoTestReport.xml"
           flags: unittests
-          fail_ci_if_error: false
+          fail_ci_if_error: false
\ No newline at end of file
diff --git a/TrafficCapture/build.gradle b/TrafficCapture/build.gradle
index ccefe8896..2b2c417a1 100644
--- a/TrafficCapture/build.gradle
+++ b/TrafficCapture/build.gradle
@@ -18,4 +18,3 @@ allprojects {
         }
     }
 }
-TrafficCapture/*/build/reports/jacoco/test/jacocoTestReport.xml

From 62626dd391a568e345a990ac74249b3486e1574c Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 15:18:27 -0500
Subject: [PATCH 47/57] Disabling python related workflows for non-python
 directories

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/CI.yml           | 9 ++++++++-
 .github/workflows/python-tests.yml | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 118dc0b3b..e5e3019fe 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -1,6 +1,13 @@
 name: CI
 
-on: [push, pull_request]
+on:
+  [push, pull_request]:
+    paths:
+      - 'cluster_migration_core/**'
+      - 'cluster_traffic_capture/**'
+      - 'index_configuration_tool/**'
+      - 'test/**'
+      - 'upgrades/**'
 
 jobs:
   lint:
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index c32f40b02..1e418b338 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -1,6 +1,13 @@
 name: python-tests
 
-on: [push, pull_request]
+on:
+  [push, pull_request]:
+    paths:
+      - 'cluster_migration_core/**'
+      - 'cluster_traffic_capture/**'
+      - 'index_configuration_tool/**'
+      - 'test/**'
+      - 'upgrades/**'
 
 jobs:
   test-linux:

From fc40567eb2006ea54335628c570806f69dba0fd3 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 16:15:22 -0500
Subject: [PATCH 48/57] Migrations-1150 - Enhance Setting Kafka Properties for
 Kafka Puller

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 TrafficCapture/kafkaPrinter/kafka.properties  |  4 ++
 .../opensearch/migrations/KafkaPrinter.java   | 41 +++++++++++++++----
 2 files changed, 36 insertions(+), 9 deletions(-)
 create mode 100644 TrafficCapture/kafkaPrinter/kafka.properties

diff --git a/TrafficCapture/kafkaPrinter/kafka.properties b/TrafficCapture/kafkaPrinter/kafka.properties
new file mode 100644
index 000000000..52249f8a0
--- /dev/null
+++ b/TrafficCapture/kafkaPrinter/kafka.properties
@@ -0,0 +1,4 @@
+# kafka properties
+key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
+value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer
+auto.offset.reset=earliest
diff --git a/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java b/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
index 7ceb46fd6..817f06250 100644
--- a/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
+++ b/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
@@ -13,6 +13,8 @@
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.InputStream;
+import java.io.FileInputStream;
 import java.time.Duration;
 import java.util.Collections;
 import java.util.Properties;
@@ -36,6 +38,15 @@ static class Parameters {
                 names = {"-g", "--group-id"},
                 description = "Client id that should be used when communicating with the Kafka broker.")
         String clientGroupId;
+        @Parameter(required = false,
+                names = {"-m", "--enable-msk-client"},
+                description = "Enables properties required for connecting to an MSK public endpoint.")
+        Boolean isMSKPublic = false;
+        @Parameter(required = false,
+                names = {"--kafkaConfigFile"},
+                arity = 1,
+                description = "Kafka properties file")
+        String kafkaPropertiesFile;
     }
 
     public static Parameters parseArgs(String[] args) {
@@ -65,16 +76,28 @@ public static void main(String[] args) {
         String topic = params.topicName;
 
         Properties properties = new Properties();
-        properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
-        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
-        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
-        properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
-        properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        if (params.kafkaPropertiesFile != null) {
+            try (InputStream input = new FileInputStream(params.kafkaPropertiesFile)) {
+                properties.load(input);
+            } catch (IOException ex) {
+                log.error("Unable to load properties from kafka.properties file.");
+                return;
+            }
+        }
+        else {
+            properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+            properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
+            properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
+            properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+            properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        }
         // Required for using SASL auth with MSK public endpoint
-        //properties.setProperty("security.protocol", "SASL_SSL");
-        //properties.setProperty("sasl.mechanism", "AWS_MSK_IAM");
-        //properties.setProperty("sasl.jaas.config", "software.amazon.msk.auth.iam.IAMLoginModule required;");
-        //properties.setProperty("sasl.client.callback.handler.class", "software.amazon.msk.auth.iam.IAMClientCallbackHandler");
+        if (params.isMSKPublic){
+            properties.setProperty("security.protocol", "SASL_SSL");
+            properties.setProperty("sasl.mechanism", "AWS_MSK_IAM");
+            properties.setProperty("sasl.jaas.config", "software.amazon.msk.auth.iam.IAMLoginModule required;");
+            properties.setProperty("sasl.client.callback.handler.class", "software.amazon.msk.auth.iam.IAMClientCallbackHandler");
+        }
 
         KafkaConsumer<String, byte[]> consumer = new KafkaConsumer<>(properties);
 

From d16b06a02501e74c682a78f6d8439e6b96a00d15 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Mon, 26 Jun 2023 16:25:34 -0500
Subject: [PATCH 49/57] Removing accidentally included kafka.properties file

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 TrafficCapture/kafkaPrinter/kafka.properties | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 TrafficCapture/kafkaPrinter/kafka.properties

diff --git a/TrafficCapture/kafkaPrinter/kafka.properties b/TrafficCapture/kafkaPrinter/kafka.properties
deleted file mode 100644
index 52249f8a0..000000000
--- a/TrafficCapture/kafkaPrinter/kafka.properties
+++ /dev/null
@@ -1,4 +0,0 @@
-# kafka properties
-key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
-value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer
-auto.offset.reset=earliest

From 8db609bb9521b37ce2d82c87b487babeb353f177 Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Tue, 27 Jun 2023 10:18:00 -0500
Subject: [PATCH 50/57] run lint on all .py changes

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/CI.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index e5e3019fe..1282d1a34 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -1,13 +1,12 @@
 name: CI
 
 on:
-  [push, pull_request]:
+  push:
     paths:
-      - 'cluster_migration_core/**'
-      - 'cluster_traffic_capture/**'
-      - 'index_configuration_tool/**'
-      - 'test/**'
-      - 'upgrades/**'
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
 
 jobs:
   lint:

From 42d2e957344b6289880c175e0d8e9642736932de Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Tue, 27 Jun 2023 10:22:33 -0500
Subject: [PATCH 51/57] run on any .py changes within the top
 cluster_migration_core directory

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .github/workflows/python-tests.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 1e418b338..367156858 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -1,13 +1,12 @@
 name: python-tests
 
 on:
-  [push, pull_request]:
+  push:
     paths:
-      - 'cluster_migration_core/**'
-      - 'cluster_traffic_capture/**'
-      - 'index_configuration_tool/**'
-      - 'test/**'
-      - 'upgrades/**'
+      - 'cluster_migration_core/**.py'
+  pull_request:
+    paths:
+      - 'cluster_migration_core/**.py'
 
 jobs:
   test-linux:

From f535bb26cfde4bd74230b2d202781359d3fad5ad Mon Sep 17 00:00:00 2001
From: Omar Khasawneh <okhasawn@amazon.com>
Date: Tue, 27 Jun 2023 10:37:47 -0500
Subject: [PATCH 52/57] changing argument and variable names + updating
 description

Signed-off-by: Omar Khasawneh <okhasawn@amazon.com>
---
 .../main/java/org/opensearch/migrations/KafkaPrinter.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java b/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
index 817f06250..28e45a6f8 100644
--- a/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
+++ b/TrafficCapture/kafkaPrinter/src/main/java/org/opensearch/migrations/KafkaPrinter.java
@@ -39,9 +39,9 @@ static class Parameters {
                 description = "Client id that should be used when communicating with the Kafka broker.")
         String clientGroupId;
         @Parameter(required = false,
-                names = {"-m", "--enable-msk-client"},
-                description = "Enables properties required for connecting to an MSK public endpoint.")
-        Boolean isMSKPublic = false;
+                names = {"--enableMSKAuth"},
+                description = "Enables SASL properties required for connecting to MSK with IAM auth.")
+        boolean mskAuthEnabled = false;
         @Parameter(required = false,
                 names = {"--kafkaConfigFile"},
                 arity = 1,
@@ -92,7 +92,7 @@ public static void main(String[] args) {
             properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
         }
         // Required for using SASL auth with MSK public endpoint
-        if (params.isMSKPublic){
+        if (params.mskAuthEnabled){
             properties.setProperty("security.protocol", "SASL_SSL");
             properties.setProperty("sasl.mechanism", "AWS_MSK_IAM");
             properties.setProperty("sasl.jaas.config", "software.amazon.msk.auth.iam.IAMLoginModule required;");

From 3427af3332fc8e184440343fe19d2ee789b9ac44 Mon Sep 17 00:00:00 2001
From: Kartik Ganesh <gkart@amazon.com>
Date: Tue, 27 Jun 2023 13:48:38 -0700
Subject: [PATCH 53/57] [index configuration tool] Change to parsing Data
 Prepper pipeline YAML as input (#215)

* Removal of Lark parsing

Signed-off-by: Kartik Ganesh <gkart@amazon.com>

* [index configuration tool] Change to parsing Data Prepper pipeline YAML as input

This change removes support for Logstash input files and the use of the Lark library. Instead, index_configuration_tool now accepts a Data Prepper pipeline YAML file as input.

Signed-off-by: Kartik Ganesh <gkart@amazon.com>

* [Index configuration tool] Added utility logic to test_constants.py

This allows easy updating of the stored pickle output file if/when the test pipeline YAML file is updated. This code is commented out for now since it will only be triggered manually.

Signed-off-by: Kartik Ganesh <gkart@amazon.com>

---------

Signed-off-by: Kartik Ganesh <gkart@amazon.com>
---
 index_configuration_tool/Dockerfile           |   3 +-
 index_configuration_tool/logstash.lark        |  39 ----------
 .../logstash_conf_parser.py                   |  52 -------------
 index_configuration_tool/main.py              |  63 +++++++++------
 index_configuration_tool/requirements.txt     |   2 +-
 .../resources/expected_parse_output.pickle    | Bin 415 -> 435 bytes
 .../tests/resources/logstash_test_input.conf  |  40 ----------
 .../tests/resources/test_pipeline_input.yaml  |  21 +++++
 .../tests/test_constants.py                   |  15 +++-
 .../tests/test_logstash_parser.py             |  73 ------------------
 index_configuration_tool/tests/test_main.py   |  64 +++++++--------
 11 files changed, 105 insertions(+), 267 deletions(-)
 delete mode 100644 index_configuration_tool/logstash.lark
 delete mode 100644 index_configuration_tool/logstash_conf_parser.py
 delete mode 100644 index_configuration_tool/tests/resources/logstash_test_input.conf
 create mode 100644 index_configuration_tool/tests/resources/test_pipeline_input.yaml
 delete mode 100644 index_configuration_tool/tests/test_logstash_parser.py

diff --git a/index_configuration_tool/Dockerfile b/index_configuration_tool/Dockerfile
index e73e4c246..03abf8581 100644
--- a/index_configuration_tool/Dockerfile
+++ b/index_configuration_tool/Dockerfile
@@ -10,9 +10,8 @@ FROM python:3.11-slim
 WORKDIR /code
 # Copy only required dependencies
 COPY --from=builder /root/.local /root/.local
-# Copy only source code and Logstash grammar
+# Copy only source code
 COPY ./*.py .
-COPY logstash.lark .
 
 # update PATH
 ENV PATH=/root/.local:$PATH
diff --git a/index_configuration_tool/logstash.lark b/index_configuration_tool/logstash.lark
deleted file mode 100644
index ea8096a0e..000000000
--- a/index_configuration_tool/logstash.lark
+++ /dev/null
@@ -1,39 +0,0 @@
-// Adapted from https://github.com/bpaquet/node-logstash/blob/master/lib/logstash_config.jison
-// Tested via https://www.lark-parser.org/ide
-// TODO add if/else support
-?start: config_section (config_section)*
-
-?config_section: type "{" plugins? "}"
-
-plugins: plugin (plugin)*
-
-plugin: key "{" plugin_params? "}"
-
-plugin_params: param (param)*
-
-param: key "=>" value
-
-?value: string_literal
-    | number
-    | "true" -> true
-    | "false" -> false
-    | list
-
-?list : "[" [member ("," member)*] "]"
-?member: string_literal | number
-type: PLUGIN_TYPE
-key: STRING
-string_literal: ESCAPED_STRING
-number: SIGNED_INT
-PLUGIN_TYPE: "input" | "filter" | "output"
-STRING: (DIGIT|LETTER|"_"|"-"|".")+
-COMMENT: "#"/.*/
-
-%import common.ESCAPED_STRING
-%import common.SIGNED_INT
-%import common.DIGIT
-%import common.LETTER
-%import common.WS
-
-%ignore WS
-%ignore COMMENT
\ No newline at end of file
diff --git a/index_configuration_tool/logstash_conf_parser.py b/index_configuration_tool/logstash_conf_parser.py
deleted file mode 100644
index 827453c69..000000000
--- a/index_configuration_tool/logstash_conf_parser.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import sys
-
-from lark import Lark
-from lark import Transformer
-
-
-# The names of each function in the Transformer corresponds
-# to
-class LogstashTransformer(Transformer):
-    def var_name(self, v: list) -> str:
-        (v,) = v
-        return v.value
-
-    def string_literal(self, s: list) -> str:
-        s = self.var_name(s)
-        # Remove surrounding quotes
-        return s[1:-1]
-
-    def number(self, n: list) -> int:
-        (n,) = n
-        return int(n)
-
-    def true(self, b) -> bool:
-        return True
-
-    def false(self, b) -> bool:
-        return False
-
-    # The same logic is applied for both rules
-    key = var_name
-    type = var_name
-    # These rules can be transformed directly to a corresponding Python type
-    start = dict
-    config_section = tuple
-    plugin_params = dict
-    list = list
-    param = tuple
-    plugin = tuple
-    plugins = list
-
-
-logstash_parser = Lark.open("logstash.lark", rel_to=__file__, parser="lalr", transformer=LogstashTransformer())
-
-
-def parse(logstash_file: str) -> dict:
-    with open(logstash_file, "r") as conf_file:
-        return logstash_parser.parse(conf_file.read())
-
-
-if __name__ == '__main__':  # pragma no cover
-    val = parse(sys.argv[1])
-    print(val)
diff --git a/index_configuration_tool/main.py b/index_configuration_tool/main.py
index 938c3ba5c..944354068 100644
--- a/index_configuration_tool/main.py
+++ b/index_configuration_tool/main.py
@@ -1,14 +1,16 @@
 import argparse
+import yaml
 from typing import Optional
 
 import index_operations
-import logstash_conf_parser as logstash_parser
 import utils
 
 # Constants
 SUPPORTED_ENDPOINTS = ["opensearch", "elasticsearch"]
+SOURCE_KEY = "source"
+SINK_KEY = "sink"
 HOSTS_KEY = "hosts"
-USER_KEY = "user"
+USER_KEY = "username"
 PWD_KEY = "password"
 
 
@@ -19,10 +21,9 @@ def get_auth(input_data: dict) -> Optional[tuple]:
 
 
 def get_endpoint_info(plugin_config: dict) -> tuple:
-    endpoint = "https://" if ("ssl" in plugin_config and plugin_config["ssl"]) else "http://"
     # "hosts" can be a simple string, or an array of hosts for Logstash to hit.
     # This tool needs one accessible host, so we pick the first entry in the latter case.
-    endpoint += plugin_config[HOSTS_KEY][0] if type(plugin_config[HOSTS_KEY]) is list else plugin_config[HOSTS_KEY]
+    endpoint = plugin_config[HOSTS_KEY][0] if type(plugin_config[HOSTS_KEY]) is list else plugin_config[HOSTS_KEY]
     endpoint += "/"
     return endpoint, get_auth(plugin_config)
 
@@ -32,13 +33,28 @@ def fetch_all_indices_by_plugin(plugin_config: dict) -> dict:
     return index_operations.fetch_all_indices(endpoint, auth_tuple)
 
 
+def check_supported_endpoint(config: dict) -> Optional[tuple]:
+    for supported_type in SUPPORTED_ENDPOINTS:
+        if supported_type in config:
+            return supported_type, config[supported_type]
+
+
 def get_supported_endpoint(config: dict, key: str) -> tuple:
-    # The value of each key is a list of plugin configs.
-    # Each config is a tuple, where the first value is the endpoint type.
-    supported_endpoint = next((p for p in config[key] if p[0] in SUPPORTED_ENDPOINTS), None)
-    if not supported_endpoint:
+    # The value of each key may be a single plugin (as a dict)
+    # or a list of plugin configs
+    supported_tuple = tuple()
+    if type(config[key]) is dict:
+        supported_tuple = check_supported_endpoint(config[key])
+    elif type(config[key]) is list:
+        for entry in config[key]:
+            supported_tuple = check_supported_endpoint(entry)
+            # Break out of the loop at the first supported type
+            if supported_tuple:
+                break
+    if not supported_tuple:
         raise ValueError("Could not find any supported endpoints in section: " + key)
-    return supported_endpoint
+    # First tuple value is the name, second value is the config dict
+    return supported_tuple[0], supported_tuple[1]
 
 
 def validate_plugin_config(config: dict, key: str):
@@ -53,11 +69,11 @@ def validate_plugin_config(config: dict, key: str):
         raise ValueError("Invalid auth configuration (Password without user) for endpoint: " + supported_endpoint[0])
 
 
-def validate_logstash_config(config: dict):
-    if "input" not in config or "output" not in config:
-        raise ValueError("Missing input or output data from Logstash config")
-    validate_plugin_config(config, "input")
-    validate_plugin_config(config, "output")
+def validate_pipeline_config(config: dict):
+    if SOURCE_KEY not in config or SINK_KEY not in config:
+        raise ValueError("Missing source or sink configuration in Data Prepper pipeline YAML")
+    validate_plugin_config(config, SOURCE_KEY)
+    validate_plugin_config(config, SINK_KEY)
 
 
 # Computes differences in indices between source and target.
@@ -92,15 +108,18 @@ def print_report(index_differences: tuple[set, set, set]):  # pragma no cover
 
 
 def run(config_file_path: str) -> None:
-    # Parse and validate logstash config file
-    logstash_config = logstash_parser.parse(config_file_path)
-    validate_logstash_config(logstash_config)
+    # Parse and validate pipelines YAML file
+    with open(config_file_path, 'r') as pipeline_file:
+        dp_config = yaml.safe_load(pipeline_file)
+    # We expect the Data Prepper pipeline to only have a single top-level value
+    pipeline_config = next(iter(dp_config.values()))
+    validate_pipeline_config(pipeline_config)
     # Endpoint is a tuple of (type, config)
-    endpoint = get_supported_endpoint(logstash_config, "input")
+    endpoint = get_supported_endpoint(pipeline_config, SOURCE_KEY)
     # Fetch all indices from source cluster
     source_indices = fetch_all_indices_by_plugin(endpoint[1])
     # Fetch all indices from target cluster
-    endpoint = get_supported_endpoint(logstash_config, "output")
+    endpoint = get_supported_endpoint(pipeline_config, SINK_KEY)
     target_endpoint, target_auth = get_endpoint_info(endpoint[1])
     target_indices = index_operations.fetch_all_indices(target_endpoint, target_auth)
     # Compute index differences and print report
@@ -119,8 +138,8 @@ def run(config_file_path: str) -> None:
     arg_parser = argparse.ArgumentParser(
         prog="python main.py",
         description="This tool creates indices on a target cluster based on the contents of a source cluster.\n" +
-        "The source and target endpoints are obtained by parsing a Logstash config file, which is the " +
-        "sole expected argument for this module.\nAlso prints a report of the indices to be created, " +
+        "The source and target endpoints are obtained by parsing a Data Prepper pipelines YAML file, which " +
+        "is the sole expected argument for this module.\nAlso prints a report of the indices to be created, " +
         "along with indices that are identical or have conflicting settings/mappings.\nIn case of the " +
         "latter, no action will be taken on the target cluster.",
         formatter_class=argparse.RawTextHelpFormatter
@@ -128,7 +147,7 @@ def run(config_file_path: str) -> None:
     # This tool only takes one argument
     arg_parser.add_argument(
         "config_file_path",
-        help="Path to the Logstash config file to parse for source and target endpoint information"
+        help="Path to the Data Prepper pipeline YAML file to parse for source and target endpoint information"
     )
     args = arg_parser.parse_args()
     print("\n##### Starting index configuration tool... #####\n")
diff --git a/index_configuration_tool/requirements.txt b/index_configuration_tool/requirements.txt
index 51e8144a1..1a7d17eef 100644
--- a/index_configuration_tool/requirements.txt
+++ b/index_configuration_tool/requirements.txt
@@ -1,4 +1,4 @@
 jsondiff>=2.0.0
-lark>=1.1.5
+pyyaml>=6.0
 requests>=2.28.2
 responses>=0.23.1
\ No newline at end of file
diff --git a/index_configuration_tool/tests/resources/expected_parse_output.pickle b/index_configuration_tool/tests/resources/expected_parse_output.pickle
index 2bb22976b6d1823463bd1dd6d95831fa0dac77a5..568cdae3ff3a8ddaf0bc005df83500e05e902688 100644
GIT binary patch
literal 435
zcma)2K~BRk5G-l}4FYlC137XjRdVDDya82iSP|3Iw!B^-NR@cN9+yAxOx8J2DsjLE
zdv<5#nVpZ~^Xp_boxSjN0!+(b0}R%|(z-xN_Hx6g=qYxfm@i>4m~4k&V%JY>*?UYV
z4?1|!rxfm1tK4bisB8szK?u>A9SB!hYn^N1MKFl3K0f}2gj|K_J3#bNKUeLuFwm2A
z&7@jkifa?287Jkr*>u9?TRqnAs#N*S^MTDf%(pNK7kaC)^2H3KE((1cp<}*1@OiFo
zg@Dj-f!1xam|7__&PKR$wEI!IpXnyrJof?IUlH+DP6#z3kL#S0n17^F<gi4;BE0<u
DwYjXy

literal 415
zcmaJ-%}&EG4BoVE-6nX49(O|8fR|v;FjZOcwuq2aaRL+xi3^B>kLG#7NktDJ;Y+^6
z@%Q7`;*-atTra$$skUtlyc1ZqCLXn2b9!H5@Y){hJLs6s`b+hoOa!L+-Z_&yA5_|S
zpW${K-}#4QBOKC#e*Wf53Z;7vMq$tmg7VEN>r77$17`gU*G&1`Fi;T@JkufohPub|
zRv|t*e=9BqD8fLqgEm3F?*<c>$;3je*UEd<WsQm2H=^)Vb=5P2cv3WTG5lJ=6~7HE
q*O?`y>{+&>lw|2zuzzDv>99_Tm?nsZsR%x8?EfT13dku%y!Zys_^G}C

diff --git a/index_configuration_tool/tests/resources/logstash_test_input.conf b/index_configuration_tool/tests/resources/logstash_test_input.conf
deleted file mode 100644
index 24caef779..000000000
--- a/index_configuration_tool/tests/resources/logstash_test_input.conf
+++ /dev/null
@@ -1,40 +0,0 @@
-input {
-
-    plugin1 {
-        string_key => "string value"
-        bool_key   => true
-        num_key    => 1
-    }
-
-    plugin2 {
-        neg_key   => -1
-    }
-
-    elasticsearch {
-        hosts     => ["host1", "host2"]
-        user      => "test_user"
-        password  => "password"
-    }
-}
-
-filter {
-    plugin3 {
-        str_array => ["abc", "x y z"]
-    }
-}
-
-output {
-    plugin4 {}
-
-    plugin5 {
-        num_array => [0]
-    }
-
-    opensearch {
-        hosts     => "os_host"
-        ssl       => "true"
-        user      => "test_user"
-        password  => "test"
-    }
-}
-
diff --git a/index_configuration_tool/tests/resources/test_pipeline_input.yaml b/index_configuration_tool/tests/resources/test_pipeline_input.yaml
new file mode 100644
index 000000000..7c33a7e21
--- /dev/null
+++ b/index_configuration_tool/tests/resources/test_pipeline_input.yaml
@@ -0,0 +1,21 @@
+test-pipeline-input:
+    source:
+        elasticsearch:
+            hosts: ["http://host1", "http://host2"]
+            username: "test_user"
+            password: "password"
+    processor:
+        - plugin1:
+            str_array: ["abc", "x y z"]
+            obj_array:
+                - key: "key1"
+                  value: "val1"
+                - key: "key2"
+                  value: "val 2"
+    sink:
+        - sink1:
+            num_array: [0]
+        - opensearch:
+            hosts: ["https://os_host"]
+            username: "test_user"
+            password: "test"
diff --git a/index_configuration_tool/tests/test_constants.py b/index_configuration_tool/tests/test_constants.py
index a6325423a..c4cd4d51f 100644
--- a/index_configuration_tool/tests/test_constants.py
+++ b/index_configuration_tool/tests/test_constants.py
@@ -2,8 +2,8 @@
 from os.path import dirname
 
 TEST_RESOURCES_SUBPATH = "/resources/"
-LOGSTASH_RAW_FILE_PATH = dirname(__file__) + TEST_RESOURCES_SUBPATH + "logstash_test_input.conf"
-LOGSTASH_PICKLE_FILE_PATH = dirname(__file__) + TEST_RESOURCES_SUBPATH + "expected_parse_output.pickle"
+PIPELINE_CONFIG_RAW_FILE_PATH = dirname(__file__) + TEST_RESOURCES_SUBPATH + "test_pipeline_input.yaml"
+PIPELINE_CONFIG_PICKLE_FILE_PATH = dirname(__file__) + TEST_RESOURCES_SUBPATH + "expected_parse_output.pickle"
 
 INDEX1_NAME = "index1"
 INDEX2_NAME = "index2"
@@ -51,6 +51,15 @@
         }
     }
 }
-# Based on the contents of logstash_test_input.conf
+# Based on the contents of test_pipeline_input.yaml
 SOURCE_ENDPOINT = "http://host1/"
 TARGET_ENDPOINT = "https://os_host/"
+
+# Utility logic to update the pickle file if/when the input file is updated
+# import yaml
+# import pickle
+# if __name__ == '__main__':
+#     with open(PIPELINE_CONFIG_RAW_FILE_PATH, 'r') as test_input:
+#         test_config = yaml.safe_load(test_input)
+#     with open(PIPELINE_CONFIG_PICKLE_FILE_PATH, 'wb') as out:
+#         pickle.dump(test_config, out)
diff --git a/index_configuration_tool/tests/test_logstash_parser.py b/index_configuration_tool/tests/test_logstash_parser.py
deleted file mode 100644
index 073f0c5c8..000000000
--- a/index_configuration_tool/tests/test_logstash_parser.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import pickle
-import unittest
-
-import lark.exceptions
-from jsondiff import diff
-
-from logstash_conf_parser import logstash_parser, parse
-from tests import test_constants
-
-
-class TestLogstashParser(unittest.TestCase):
-    # Run before each test
-    def setUp(self) -> None:
-        with open(test_constants.LOGSTASH_PICKLE_FILE_PATH, "rb") as f:
-            # The root DS is a dict, with input type as key.
-            # The value of each key is an array of inputs.
-            # Each input is a tuple of plugin name and data,
-            # where the data is a dict of key-value pairs.
-            self.test_data = pickle.load(f)
-
-    # Test input json should match loaded pickle data
-    def test_parser_happy_case(self):
-        actual = parse(test_constants.LOGSTASH_RAW_FILE_PATH)
-        test_diff = diff(self.test_data, actual)
-        # Validate that diff is empty
-        self.assertEqual(test_diff, dict())
-
-    def test_bad_configs(self):
-        # Checks for:
-        # - Empty config
-        # - Section should begin with type name
-        # - Invalid type
-        # - Valid type but no params
-        bad_configs = ["", "{}", "bad {}", "input"]
-        for config in bad_configs:
-            self.assertRaises(lark.exceptions.UnexpectedToken, logstash_parser.parse, config)
-
-    # Note that while these are considered valid Logstash configurations,
-    # main.py considers them incomplete and would fail when validating them.
-    def test_empty_config_can_be_parsed(self):
-        logstash_parser.parse("input {}")
-        logstash_parser.parse("filter {}")
-        logstash_parser.parse("output {}")
-
-    def test_string(self):
-        val = self.test_data["input"][0][1]["string_key"]
-        self.assertEqual(str, type(val))
-        self.assertTrue(len(val) > 0)
-
-    def test_bool(self):
-        val = self.test_data["input"][0][1]["bool_key"]
-        self.assertEqual(bool, type(val))
-        self.assertTrue(val)
-
-    def test_num(self):
-        num = self.test_data["input"][0][1]["num_key"]
-        neg_num = self.test_data["input"][1][1]["neg_key"]
-        self.assertEqual(int, type(num))
-        self.assertEqual(1, num)
-        self.assertEqual(int, type(neg_num))
-        self.assertEqual(-1, neg_num)
-
-
-# Utility method to update the expected output pickle
-# file if/when the input conf file is changed.
-def __update_output_pickle():
-    with open(test_constants.LOGSTASH_PICKLE_FILE_PATH, "wb") as out:
-        val = parse(test_constants.LOGSTASH_RAW_FILE_PATH)
-        pickle.dump(val, out)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/index_configuration_tool/tests/test_main.py b/index_configuration_tool/tests/test_main.py
index e4374615e..cde419328 100644
--- a/index_configuration_tool/tests/test_main.py
+++ b/index_configuration_tool/tests/test_main.py
@@ -11,24 +11,18 @@
 # Constants
 TEST_KEY = "test_key"
 BASE_CONFIG_SECTION = {
-    TEST_KEY: [("invalid_plugin1", None), ("invalid_plugin2", {})]
+    TEST_KEY: [{"invalid_plugin1": {"key": "val"}}, {"invalid_plugin2": {}}]
 }
 
 
 # Utility method to create a test plugin config
 def create_plugin_config(host_list: list[str],
-                         ssl: Optional[bool] = None,
                          user: Optional[str] = None,
                          password: Optional[str] = None) -> dict:
     config = dict()
-    if len(host_list) == 1:
-        config["hosts"] = host_list[0]
-    else:
-        config["hosts"] = host_list
-    if ssl:
-        config["ssl"] = ssl
+    config["hosts"] = host_list
     if user:
-        config["user"] = user
+        config["username"] = user
     if password:
         config["password"] = password
     return config
@@ -36,7 +30,8 @@ def create_plugin_config(host_list: list[str],
 
 # Utility method to creat a test config section
 def create_config_section(plugin_config: dict) -> dict:
-    valid_plugin = (random.choice(main.SUPPORTED_ENDPOINTS), plugin_config)
+    valid_plugin = dict()
+    valid_plugin[random.choice(main.SUPPORTED_ENDPOINTS)] = plugin_config
     config_section = copy.deepcopy(BASE_CONFIG_SECTION)
     config_section[TEST_KEY].append(valid_plugin)
     return config_section
@@ -45,53 +40,50 @@ def create_config_section(plugin_config: dict) -> dict:
 class TestMain(unittest.TestCase):
     # Run before each test
     def setUp(self) -> None:
-        with open(test_constants.LOGSTASH_PICKLE_FILE_PATH, "rb") as f:
-            self.loaded_logstash_config = pickle.load(f)
+        with open(test_constants.PIPELINE_CONFIG_PICKLE_FILE_PATH, "rb") as f:
+            self.loaded_pipeline_config = pickle.load(f)
 
     def test_get_auth_returns_none(self):
         # The following inputs should not return an auth tuple:
         # - Empty input
         # - user without password
         # - password without user
-        input_list = [{}, {"user": "test"}, {"password": "test"}]
+        input_list = [{}, {"username": "test"}, {"password": "test"}]
         for test_input in input_list:
             self.assertIsNone(main.get_auth(test_input))
 
     def test_get_auth_for_valid_input(self):
         # Test valid input
-        result = main.get_auth({"user": "user", "password": "pass"})
+        result = main.get_auth({"username": "user", "password": "pass"})
         self.assertEqual(tuple, type(result))
         self.assertEqual("user", result[0])
         self.assertEqual("pass", result[1])
 
     def test_get_endpoint_info(self):
+        host_input = "test"
+        expected_endpoint = "test/"
         test_user = "user"
         test_password = "password"
         # Simple base case
-        test_config = create_plugin_config(["test"])
+        test_config = create_plugin_config([host_input])
         result = main.get_endpoint_info(test_config)
-        self.assertEqual("http://test/", result[0])
+        self.assertEqual(expected_endpoint, result[0])
         self.assertIsNone(result[1])
-        # SSL enabled
-        test_config = create_plugin_config(["test"], True)
+        # Invalid auth config
+        test_config = create_plugin_config([host_input], test_user)
         result = main.get_endpoint_info(test_config)
-        self.assertEqual("https://test/", result[0])
+        self.assertEqual(expected_endpoint, result[0])
         self.assertIsNone(result[1])
-        # SSL disabled, invalid auth config
-        test_config = create_plugin_config(["test"], False, test_user)
+        # Valid auth config
+        test_config = create_plugin_config([host_input], user=test_user, password=test_password)
         result = main.get_endpoint_info(test_config)
-        self.assertEqual("http://test/", result[0])
-        self.assertIsNone(result[1])
-        # SSL disabled, valid auth config
-        test_config = create_plugin_config(["test"], user=test_user, password=test_password)
-        result = main.get_endpoint_info(test_config)
-        self.assertEqual("http://test/", result[0])
+        self.assertEqual(expected_endpoint, result[0])
         self.assertEqual(test_user, result[1][0])
         self.assertEqual(test_password, result[1][1])
         # Array of hosts uses the first entry
-        test_config = create_plugin_config(["test1", "test2"], True, test_user, test_password)
+        test_config = create_plugin_config([host_input, "other_host"], test_user, test_password)
         result = main.get_endpoint_info(test_config)
-        self.assertEqual("https://test1/", result[0])
+        self.assertEqual(expected_endpoint, result[0])
         self.assertEqual(test_user, result[1][0])
         self.assertEqual(test_password, result[1][1])
 
@@ -184,22 +176,24 @@ def test_validate_plugin_config_bad_auth_user(self):
         self.assertRaises(ValueError, main.validate_plugin_config, test_data, TEST_KEY)
 
     def test_validate_plugin_config_happy_case(self):
-        plugin_config = create_plugin_config(["host"], True, "user", "password")
+        plugin_config = create_plugin_config(["host"], "user", "password")
         test_data = create_config_section(plugin_config)
         # Should complete without errors
         main.validate_plugin_config(test_data, TEST_KEY)
 
-    def test_validate_logstash_config_missing_required_keys(self):
+    def test_validate_pipeline_config_missing_required_keys(self):
         # Test cases:
         # - Empty input
         # - missing output
         # - missing input
         bad_configs = [{}, {"input": ()}, {"output": ()}]
         for config in bad_configs:
-            self.assertRaises(ValueError, main.validate_logstash_config, config)
+            self.assertRaises(ValueError, main.validate_pipeline_config, config)
 
-    def test_validate_logstash_config_happy_case(self):
-        main.validate_logstash_config(self.loaded_logstash_config)
+    def test_validate_pipeline_config_happy_case(self):
+        # Get top level value
+        test_config = next(iter(self.loaded_pipeline_config.values()))
+        main.validate_pipeline_config(test_config)
 
     @patch('main.print_report')
     @patch('index_operations.create_indices')
@@ -222,7 +216,7 @@ def test_run(self, mock_fetch_indices: MagicMock, mock_create_indices: MagicMock
         index_settings[test_constants.INDEX_KEY][test_constants.NUM_REPLICAS_SETTING] += 1
         # Fetch indices is called first for source, then for target
         mock_fetch_indices.side_effect = [test_constants.BASE_INDICES_DATA, target_indices_data]
-        main.run(test_constants.LOGSTASH_RAW_FILE_PATH)
+        main.run(test_constants.PIPELINE_CONFIG_RAW_FILE_PATH)
         mock_create_indices.assert_called_once_with(expected_create_payload, test_constants.TARGET_ENDPOINT, ANY)
         mock_print_report.assert_called_once_with(expected_diff)
 

From 98815a925bb9d3b18e376638ee817eb34c2c5b29 Mon Sep 17 00:00:00 2001
From: Greg Schohn <greg.schohn@gmail.com>
Date: Tue, 27 Jun 2023 16:49:47 -0400
Subject: [PATCH 54/57] Make getOrCreateNodeMap() less pessimistic and add a
 TODO for work that needs to be done to remove (or at least describe) an
 obscure and rare race condition.

Signed-off-by: Greg Schohn <greg.schohn@gmail.com>
---
 .../replay/ExpiringTrafficStreamMap.java        | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java
index f9a851320..4f89e3121 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java
@@ -22,6 +22,11 @@
  * This doesn't use more typical out-of-the-box LRU mechanisms.  Our requirements are a little bit different.
  * First, we're fine buffering a variable number of items and secondly, this should be threadsafe an able to
  * be used in highly concurrent contexts.
+ * 
+ *  TODO - there will be a race condition in the ExpiringTrafficStream maps/sets where items
+ *  could be expunged from the collections while they're still in use.  Adding refCounts to
+ *  the collection items that can be checked atomically before purging would mitigate this
+ *  situation
  */
 @Slf4j
 public class ExpiringTrafficStreamMap {
@@ -243,9 +248,15 @@ public ExpiringTrafficStreamMap(Duration minimumGuaranteedLifetime,
     }
 
     private ExpiringKeyQueue getOrCreateNodeMap(String partitionId, EpochMillis timestamp) {
-        var newMap = new ExpiringKeyQueue(partitionId, timestamp);
-        var priorMap = nodeToExpiringBucketMap.putIfAbsent(partitionId, newMap);
-        return priorMap == null ? newMap : priorMap;
+        // optimistic get - if it's already there, proceed with it.
+        var ekq = nodeToExpiringBucketMap.get(partitionId);
+        if (ekq != null) {
+            return ekq;
+        } else {
+            var newMap = new ExpiringKeyQueue(partitionId, timestamp);
+            var priorMap = nodeToExpiringBucketMap.putIfAbsent(partitionId, newMap);
+            return priorMap == null ? newMap : priorMap;
+        }
     }
 
     /**

From bf317911693df8469316c50abab365cf3aa85324 Mon Sep 17 00:00:00 2001
From: Greg Schohn <greg.schohn@gmail.com>
Date: Wed, 28 Jun 2023 15:52:06 -0400
Subject: [PATCH 55/57] Move ExpiringTrafficStreamMap file to a new package to
 accommodate refactoring in the next commit.

Signed-off-by: Greg Schohn <greg.schohn@gmail.com>
---
 .../{ => traffic/expiration}/ExpiringTrafficStreamMap.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
 rename TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/{ => traffic/expiration}/ExpiringTrafficStreamMap.java (99%)

diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
similarity index 99%
rename from TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java
rename to TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
index 4f89e3121..e954fd012 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMap.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
@@ -1,8 +1,9 @@
-package org.opensearch.migrations.replay;
+package org.opensearch.migrations.replay.traffic.expiration;
 
 import lombok.AllArgsConstructor;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
+import org.opensearch.migrations.replay.Accumulation;
 
 import java.time.Duration;
 import java.time.Instant;

From 67effcffceec72a58d4f47549f6b3e016c6b1070 Mon Sep 17 00:00:00 2001
From: Greg Schohn <greg.schohn@gmail.com>
Date: Wed, 28 Jun 2023 15:54:45 -0400
Subject: [PATCH 56/57] Refactor inner classes from ExpiringTrafficStreamMap
 into separate classes. One other refactorings to simplify data encapsulation
 between the main map itself and each ExpiringKeyQueue.

Signed-off-by: Greg Schohn <greg.schohn@gmail.com>
---
 .../migrations/replay/Accumulation.java       |   2 +-
 ...edTrafficToHttpTransactionAccumulator.java |   4 +-
 .../traffic/expiration/AccumulatorMap.java    |   8 +
 .../traffic/expiration/BehavioralPolicy.java  |  65 +++++
 .../traffic/expiration/EpochMillis.java       |  41 ++++
 .../traffic/expiration/ExpiringKeyQueue.java  | 114 +++++++++
 .../expiration/ExpiringTrafficStreamMap.java  | 223 ++----------------
 .../expiration/ScopedConnectionIdKey.java     |  11 +
 ...xpiringTrafficStreamMapSequentialTest.java |   4 +-
 ...ExpiringTrafficStreamMapUnorderedTest.java |   4 +-
 10 files changed, 267 insertions(+), 209 deletions(-)
 create mode 100644 TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/AccumulatorMap.java
 create mode 100644 TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/BehavioralPolicy.java
 create mode 100644 TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/EpochMillis.java
 create mode 100644 TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringKeyQueue.java
 create mode 100644 TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ScopedConnectionIdKey.java

diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/Accumulation.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/Accumulation.java
index 3d6bcfae2..14fdd3b6f 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/Accumulation.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/Accumulation.java
@@ -6,7 +6,7 @@
 
 public class Accumulation {
     RequestResponsePacketPair rrPair = new RequestResponsePacketPair();
-    final AtomicLong newestPacketTimestampInMillis;
+    public final AtomicLong newestPacketTimestampInMillis;
     State state = State.NOTHING_SENT;
 
     public Accumulation() {
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/CapturedTrafficToHttpTransactionAccumulator.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/CapturedTrafficToHttpTransactionAccumulator.java
index 31ccf7772..24c7232b2 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/CapturedTrafficToHttpTransactionAccumulator.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/CapturedTrafficToHttpTransactionAccumulator.java
@@ -1,6 +1,8 @@
 package org.opensearch.migrations.replay;
 
 import lombok.extern.slf4j.Slf4j;
+import org.opensearch.migrations.replay.traffic.expiration.BehavioralPolicy;
+import org.opensearch.migrations.replay.traffic.expiration.ExpiringTrafficStreamMap;
 import org.opensearch.migrations.trafficcapture.protos.TrafficObservation;
 
 import java.time.Duration;
@@ -52,7 +54,7 @@ public CapturedTrafficToHttpTransactionAccumulator(Duration minTimeout,
                                                        Consumer<HttpMessageAndTimestamp> requestReceivedHandler,
                                                        Consumer<RequestResponsePacketPair> fullDataHandler) {
         liveStreams = new ExpiringTrafficStreamMap(minTimeout, EXPIRATION_GRANULARITY,
-                new ExpiringTrafficStreamMap.BehavioralPolicy() {
+                new BehavioralPolicy() {
                     @Override
                     public void onExpireAccumulation(String partitionId,
                                                      String connectionId,
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/AccumulatorMap.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/AccumulatorMap.java
new file mode 100644
index 000000000..37a9bf56b
--- /dev/null
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/AccumulatorMap.java
@@ -0,0 +1,8 @@
+package org.opensearch.migrations.replay.traffic.expiration;
+
+import org.opensearch.migrations.replay.Accumulation;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+class AccumulatorMap extends ConcurrentHashMap<ScopedConnectionIdKey, Accumulation> {
+}
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/BehavioralPolicy.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/BehavioralPolicy.java
new file mode 100644
index 000000000..2df172149
--- /dev/null
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/BehavioralPolicy.java
@@ -0,0 +1,65 @@
+package org.opensearch.migrations.replay.traffic.expiration;
+
+import lombok.extern.slf4j.Slf4j;
+import org.opensearch.migrations.replay.Accumulation;
+
+import java.time.Duration;
+import java.time.Instant;
+
+/**
+ * I should look up what this is called in the Gang of Four book.
+ * In my mind, this is a metaprogramming policy mixin.
+ */
+@Slf4j
+public class BehavioralPolicy {
+    public void onDataArrivingBeforeTheStartOfTheCurrentProcessingWindow(
+            String partitionId, String connectionId, Instant timestamp, Instant endOfWindow) {
+        log.error("Could not update the expiration of an object whose timestamp is before the " +
+                "oldest point in time that packets are still being processed for this partition.  " +
+                "This means that there was larger than expected temporal jitter in packets.  " +
+                "That means that traffic for this connection may have already been reported as expired " +
+                "and processed as such and that this data will not be properly handled due to other data " +
+                "within the connection being prematurely expired.  Trying to send the data through a new " +
+                "instance of this object with a minimumGuaranteedLifetime of " +
+                Duration.between(timestamp, endOfWindow) + " will allow for this packet to be properly " +
+                "accumulated for (" + partitionId + "," + connectionId + ")");
+    }
+
+    public void onNewDataArrivingAfterItsAccumulationHasBeenExpired(
+            String partitionId, String connectionId, Instant lastPacketTimestamp, Instant endOfWindow) {
+        log.error("New data has arrived, but during the processing of this Accumulation object, " +
+                "the Accumulation was expired.  This indicates that the minimumGuaranteedLifetime " +
+                "must be set to at least " + Duration.between(lastPacketTimestamp, endOfWindow) +
+                ".  The beginning of the valid time window is currently " + endOfWindow +
+                " for (" + partitionId + "," + connectionId + ") and the last timestamp of the " +
+                "Accumulation object that was being assembled was");
+    }
+
+    public void onNewDataArrivingWithATimestampThatIsAlreadyExpired(
+            String partitionId, String connectionId, Instant timestamp, Instant endOfWindow) {
+        log.error("New data has arrived, but during the processing of this Accumulation object, " +
+                "the Accumulation was expired.  This indicates that the minimumGuaranteedLifetime " +
+                "must be set to at least " + Duration.between(timestamp, endOfWindow) +
+                ".  The beginning of the valid time window is currently " + endOfWindow +
+                " for (" + partitionId + "," + connectionId + ") and the last timestamp of the " +
+                "Accumulation object that was being assembled was");
+    }
+
+    public boolean shouldRetryAfterAccumulationTimestampRaceDetected(String partitionId, String connectionId,
+                                                                     Instant timestamp, Accumulation accumulation,
+                                                                     int attempts) {
+        if (attempts > ExpiringTrafficStreamMap.DEFAULT_NUM_TIMESTAMP_UPDATE_ATTEMPTS) {
+            log.error("A race condition was detected while trying to update the most recent timestamp " +
+                    "(" + timestamp + ") of " + "accumulation (" + accumulation + ") for " +
+                    partitionId + "/" + connectionId + ".  Giving up after " + attempts + " attempts.  " +
+                    "Data for this connection may be corrupted.");
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    public void onExpireAccumulation(String partitionId, String connectionId, Accumulation accumulation) {
+        // do nothing by default
+    }
+}
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/EpochMillis.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/EpochMillis.java
new file mode 100644
index 000000000..e72746916
--- /dev/null
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/EpochMillis.java
@@ -0,0 +1,41 @@
+package org.opensearch.migrations.replay.traffic.expiration;
+
+import lombok.EqualsAndHashCode;
+
+import java.time.Instant;
+import java.util.function.BiPredicate;
+
+@EqualsAndHashCode
+class EpochMillis implements Comparable<EpochMillis> {
+    final long millis;
+
+    public EpochMillis(Instant i) {
+        millis = i.toEpochMilli();
+    }
+
+    public EpochMillis(long ms) {
+        this.millis = ms;
+    }
+
+    public boolean test(EpochMillis referenceTimestamp, BiPredicate<Long, Long> c) {
+        return c.test(this.millis, referenceTimestamp.millis);
+    }
+
+    public boolean test(Instant referenceTimestamp, BiPredicate<Long, Long> c) {
+        return c.test(this.millis, referenceTimestamp.toEpochMilli());
+    }
+
+    public Instant toInstant() {
+        return Instant.ofEpochMilli(millis);
+    }
+
+    @Override
+    public String toString() {
+        return Long.toString(millis);
+    }
+
+    @Override
+    public int compareTo(EpochMillis o) {
+        return Long.valueOf(this.millis).compareTo(o.millis);
+    }
+}
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringKeyQueue.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringKeyQueue.java
new file mode 100644
index 000000000..b2ce1c43c
--- /dev/null
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringKeyQueue.java
@@ -0,0 +1,114 @@
+package org.opensearch.migrations.replay.traffic.expiration;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+
+/**
+ * This is a sequence of (concurrent) hashmaps segmented by time.  Each element in the sequence is
+ * composed of a timestamp and a map.  The timestamp at each element is guaranteed to be greater
+ * than all items within all maps that preceded it.
+ */
+@Slf4j
+class ExpiringKeyQueue extends
+        ConcurrentSkipListMap<EpochMillis, ConcurrentHashMap<String, Boolean>> {
+    private final Duration granularity;
+    private final String partitionId;
+
+    ExpiringKeyQueue(Duration granularity, String partitionId, EpochMillis startingTimestamp) {
+        this.granularity = granularity;
+        this.partitionId = partitionId;
+        addNewSet(startingTimestamp);
+    }
+
+    public Instant getLatestPossibleKeyValue() {
+        return lastKey().toInstant().plus(granularity);
+    }
+
+    private ConcurrentHashMap<String, Boolean> addNewSet(EpochMillis timestampMillis) {
+        var accumulatorMap = new ConcurrentHashMap<String, Boolean>();
+        this.put(timestampMillis, accumulatorMap);
+        return accumulatorMap;
+    }
+
+    /**
+     * Returns null if the requested timestamp is in the expired range of timestamps,
+     * otherwise this returns the appropriate bucket.  It either finds it within the map
+     * or creates a new one and inserts it into the map (atomically).
+     *
+     * @param timestamp
+     * @return
+     */
+    ConcurrentHashMap<String, Boolean> getHashSetForTimestamp(EpochMillis timestamp,
+                                                              Runnable onNewBucketCreated) {
+        return Optional.ofNullable(this.floorEntry(timestamp))
+                .map(kvp -> {
+                    var shiftedKey = kvp.getKey().toInstant().plus(granularity);
+                    if (timestamp.test(shiftedKey, (newTimestamp, computedFloor) -> newTimestamp >= computedFloor)) {
+                        try {
+                            return createNewSlot(timestamp, kvp.getKey());
+                        } finally {
+                            onNewBucketCreated.run();
+                        }
+                    }
+                    return kvp.getValue();
+                })
+                .orElse(null); // floorEntry could be null if the entry was too old
+    }
+
+    /**
+     * We don't want to have a race condition where many nearby keys could be created.  This could happen
+     * if many requests come in with slightly different timestamps, but were being processed before the
+     * new bucket was created.  Since we're dealing with a map, the simplest way around this is to reduce,
+     * or quantize, the range of potential keys so that each key will uniquely identify an entire range.
+     * It should be impossible for two keys to have any overlap over the granularity window.
+     * <p>
+     * That allows us to put a new entry ONLY IF it isn't already there, which results in a uniqueness
+     * invariant that makes a lot of other things easier to reason with.
+     */
+    private ConcurrentHashMap<String, Boolean> createNewSlot(EpochMillis timestamp, EpochMillis referenceKey) {
+        var granularityMs = granularity.toMillis();
+        var quantizedDifference = (timestamp.millis - referenceKey.millis) / granularityMs;
+        var newKey = referenceKey.millis + (quantizedDifference * granularityMs);
+        var newMap = new ConcurrentHashMap<String, Boolean>();
+        var priorMap = putIfAbsent(new EpochMillis(newKey), newMap);
+        return priorMap == null ? newMap : priorMap;
+    }
+
+    void expireOldSlots(AccumulatorMap connectionAccumulatorMap,
+                        BehavioralPolicy behavioralPolicy,
+                        Duration minimumGuaranteedLifetime,
+                        EpochMillis largestCurrentObservedTimestamp) {
+        var startOfWindow =
+                new EpochMillis(largestCurrentObservedTimestamp.toInstant().minus(minimumGuaranteedLifetime));
+        for (var kvp = firstEntry();
+             kvp.getKey().test(startOfWindow, (first, windowStart) -> first < windowStart);
+             kvp = firstEntry()) {
+            expireItemsBefore(connectionAccumulatorMap, behavioralPolicy, kvp.getValue(), startOfWindow);
+            remove(kvp.getKey());
+        }
+    }
+
+    private void expireItemsBefore(AccumulatorMap connectionAccumulatorMap,
+                                   BehavioralPolicy behavioralPolicy,
+                                   ConcurrentHashMap<String, Boolean> keyMap,
+                                   EpochMillis earlierTimesToPreserve) {
+        log.debug("Expiring entries before " + earlierTimesToPreserve);
+        for (var connectionId : keyMap.keySet()) {
+            var key = new ScopedConnectionIdKey(partitionId, connectionId);
+            var accumulation = connectionAccumulatorMap.get(key);
+            if (accumulation != null &&
+                    accumulation.newestPacketTimestampInMillis.get() < earlierTimesToPreserve.millis) {
+                var priorValue = connectionAccumulatorMap.remove(key);
+                if (priorValue != null) {
+                    priorValue.newestPacketTimestampInMillis.set(ExpiringTrafficStreamMap.ACCUMULATION_DEAD_SENTINEL);
+                    behavioralPolicy.onExpireAccumulation(partitionId, connectionId, accumulation);
+                }
+            }
+        }
+    }
+}
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
index e954fd012..47ddb2406 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ExpiringTrafficStreamMap.java
@@ -1,16 +1,11 @@
 package org.opensearch.migrations.replay.traffic.expiration;
 
-import lombok.AllArgsConstructor;
-import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.opensearch.migrations.replay.Accumulation;
 
 import java.time.Duration;
 import java.time.Instant;
-import java.util.Optional;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.function.BiPredicate;
 import java.util.stream.Stream;
 
 /**
@@ -36,198 +31,6 @@ public class ExpiringTrafficStreamMap {
     public static final int ACCUMULATION_DEAD_SENTINEL = Integer.MAX_VALUE;
     public static final int ACCUMULATION_TIMESTAMP_NOT_SET_YET_SENTINEL = 0;
 
-    @EqualsAndHashCode
-    public static class EpochMillis implements Comparable<EpochMillis> {
-        final long millis;
-        public EpochMillis(Instant i) {
-            millis = i.toEpochMilli();
-        }
-        public EpochMillis(long ms) {
-            this.millis = ms;
-        }
-
-        public boolean test(EpochMillis referenceTimestamp, BiPredicate<Long,Long> c) {
-            return c.test(this.millis, referenceTimestamp.millis);
-        }
-
-        public boolean test(Instant referenceTimestamp, BiPredicate<Long,Long> c) {
-            return c.test(this.millis, referenceTimestamp.toEpochMilli());
-        }
-
-        public Instant toInstant() {
-            return Instant.ofEpochMilli(millis);
-        }
-        @Override
-        public String toString() { return Long.toString(millis); }
-
-        @Override
-        public int compareTo(EpochMillis o) {
-            return Long.valueOf(this.millis).compareTo(o.millis);
-        }
-    }
-
-    /**
-     * I should look up what this is called in the Gang of Four book.
-     * In my mind, this is a metaprogramming policy mixin.
-     */
-    public static class BehavioralPolicy {
-        public void onDataArrivingBeforeTheStartOfTheCurrentProcessingWindow(
-                String partitionId, String connectionId, Instant timestamp, Instant endOfWindow) {
-            log.error("Could not update the expiration of an object whose timestamp is before the " +
-                    "oldest point in time that packets are still being processed for this partition.  " +
-                    "This means that there was larger than expected temporal jitter in packets.  " +
-                    "That means that traffic for this connection may have already been reported as expired " +
-                    "and processed as such and that this data will not be properly handled due to other data " +
-                    "within the connection being prematurely expired.  Trying to send the data through a new " +
-                    "instance of this object with a minimumGuaranteedLifetime of " +
-                    Duration.between(timestamp, endOfWindow) + " will allow for this packet to be properly " +
-                    "accumulated for (" + partitionId + "," + connectionId + ")");
-        }
-
-        public void onNewDataArrivingAfterItsAccumulationHasBeenExpired(
-                String partitionId, String connectionId, Instant lastPacketTimestamp, Instant endOfWindow) {
-            log.error("New data has arrived, but during the processing of this Accumulation object, " +
-                    "the Accumulation was expired.  This indicates that the minimumGuaranteedLifetime " +
-                    "must be set to at least " + Duration.between(lastPacketTimestamp, endOfWindow) +
-                    ".  The beginning of the valid time window is currently " + endOfWindow +
-                    " for (" + partitionId + "," + connectionId + ") and the last timestamp of the " +
-                    "Accumulation object that was being assembled was");
-        }
-
-        public void onNewDataArrivingWithATimestampThatIsAlreadyExpired(
-                String partitionId, String connectionId, Instant timestamp, Instant endOfWindow) {
-            log.error("New data has arrived, but during the processing of this Accumulation object, " +
-                    "the Accumulation was expired.  This indicates that the minimumGuaranteedLifetime " +
-                    "must be set to at least " + Duration.between(timestamp, endOfWindow) +
-                    ".  The beginning of the valid time window is currently " + endOfWindow +
-                    " for (" + partitionId + "," + connectionId + ") and the last timestamp of the " +
-                    "Accumulation object that was being assembled was");
-        }
-
-        public boolean shouldRetryAfterAccumulationTimestampRaceDetected(String partitionId, String connectionId,
-                                                                         Instant timestamp, Accumulation accumulation,
-                                                                         int attempts) {
-            if (attempts > DEFAULT_NUM_TIMESTAMP_UPDATE_ATTEMPTS) {
-                log.error("A race condition was detected while trying to update the most recent timestamp " +
-                        "(" + timestamp + ") of " + "accumulation (" + accumulation + ") for " +
-                        partitionId + "/" + connectionId + ".  Giving up after " + attempts + " attempts.  " +
-                        "Data for this connection may be corrupted.");
-                return false;
-            } else {
-                return true;
-            }
-        }
-
-        public void onExpireAccumulation(String partitionId, String connectionId, Accumulation accumulation) {
-            // do nothing by default
-        }
-    }
-
-    @AllArgsConstructor
-    @EqualsAndHashCode
-    private static class ScopedConnectionIdKey {
-        public final String nodeId;
-        public final String connectionId;
-    }
-
-    private static class AccumulatorMap extends ConcurrentHashMap<ScopedConnectionIdKey, Accumulation> {}
-
-    /**
-     * This is a sequence of (concurrent) hashmaps segmented by time.  Each element in the sequence is
-     * composed of a timestamp and a map.  The timestamp at each element is guaranteed to be greater
-     * than all items within all maps that preceded it.
-     *
-     * Notice that this class DOES use some values from the surrounding class (granularity)
-     */
-    private class ExpiringKeyQueue extends
-            ConcurrentSkipListMap<EpochMillis,ConcurrentHashMap<String,Boolean>> {
-        private final String partitionId;
-
-        ExpiringKeyQueue(String partitionId, EpochMillis startingTimestamp) {
-            this.partitionId = partitionId;
-            addNewSet(startingTimestamp);
-        }
-
-        public Instant getLatestPossibleKeyValue() {
-            return lastKey().toInstant().plus(granularity);
-        }
-
-        private ConcurrentHashMap<String,Boolean> addNewSet(EpochMillis timestampMillis) {
-            var accumulatorMap = new ConcurrentHashMap<String,Boolean>();
-            this.put(timestampMillis, accumulatorMap);
-            return accumulatorMap;
-        }
-
-        /**
-         * Returns null if the requested timestamp is in the expired range of timestamps,
-         * otherwise this returns the appropriate bucket.  It either finds it within the map
-         * or creates a new one and inserts it into the map (atomically).
-         * @param timestamp
-         * @return
-         */
-        private ConcurrentHashMap<String, Boolean> getHashSetForTimestamp(EpochMillis timestamp) {
-            return Optional.ofNullable(this.floorEntry(timestamp))
-                    .map(kvp-> {
-                        var shiftedKey = kvp.getKey().toInstant().plus(granularity);
-                        if (timestamp.test(shiftedKey, (boundary, ref) -> boundary>=ref)) {
-                            try {
-                                return createNewSlot(timestamp, kvp.getKey());
-                            } finally {
-                                expireOldSlots(timestamp);
-                            }
-                        }
-                        return kvp.getValue();
-                    })
-                    .orElse(null);
-        }
-
-        /**
-         * We don't want to have a race condition where many nearby keys could be created.  This could happen
-         * if many requests come in with slightly different timestamps, but were being processed before the
-         * new bucket was created.  Since we're dealing with a map, the simplest way around this is to reduce,
-         * or quantize, the range of potential keys so that each key will uniquely identify an entire range.
-         * It should be impossible for two keys to have any overlap over the granularity window.
-         *
-         * That allows us to put a new entry ONLY IF it isn't already there, which results in a uniqueness
-         * invariant that makes a lot of other things easier to reason with.
-         */
-        private ConcurrentHashMap<String, Boolean> createNewSlot(EpochMillis timestamp, EpochMillis referenceKey)  {
-            var granularityMs = granularity.toMillis();
-            var quantizedDifference = (timestamp.millis - referenceKey.millis) / granularityMs;
-            var newKey = referenceKey.millis + (quantizedDifference * granularityMs);
-            var newMap = new ConcurrentHashMap<String, Boolean>();
-            var priorMap = putIfAbsent(new EpochMillis(newKey), newMap);
-            return priorMap == null ? newMap : priorMap;
-        }
-
-        private void expireOldSlots(EpochMillis largestCurrentObservedTimestamp) {
-            var startOfWindow =
-                    new EpochMillis(largestCurrentObservedTimestamp.toInstant().minus(minimumGuaranteedLifetime));
-            for (var kvp = firstEntry();
-                 kvp.getKey().test(startOfWindow, (first, windowStart)->first<windowStart);
-                 kvp = firstEntry()) {
-                expireItemsBefore(kvp.getValue(), startOfWindow);
-                remove(kvp.getKey());
-            }
-        }
-
-        private void expireItemsBefore(ConcurrentHashMap<String, Boolean> keyMap, EpochMillis earlierTimesToPreserve) {
-            log.debug("Expiring entries before " + earlierTimesToPreserve);
-            for (var connectionId : keyMap.keySet()) {
-                var key = new ScopedConnectionIdKey(partitionId, connectionId);
-                var accumulation = connectionAccumulationMap.get(key);
-                if (accumulation != null &&
-                        accumulation.newestPacketTimestampInMillis.get() < earlierTimesToPreserve.millis) {
-                    var priorValue = connectionAccumulationMap.remove(key);
-                    if (priorValue != null) {
-                        priorValue.newestPacketTimestampInMillis.set(ACCUMULATION_DEAD_SENTINEL);
-                        behavioralPolicy.onExpireAccumulation(partitionId, connectionId, accumulation);
-                    }
-                }
-            }
-        }
-    }
-
     protected final AccumulatorMap connectionAccumulationMap;
     protected final ConcurrentHashMap<String, ExpiringKeyQueue> nodeToExpiringBucketMap;
     protected final Duration minimumGuaranteedLifetime;
@@ -254,7 +57,7 @@ private ExpiringKeyQueue getOrCreateNodeMap(String partitionId, EpochMillis time
         if (ekq != null) {
             return ekq;
         } else {
-            var newMap = new ExpiringKeyQueue(partitionId, timestamp);
+            var newMap = new ExpiringKeyQueue(this.granularity, partitionId, timestamp);
             var priorMap = nodeToExpiringBucketMap.putIfAbsent(partitionId, newMap);
             return priorMap == null ? newMap : priorMap;
         }
@@ -297,7 +100,8 @@ private boolean updateExpirationTrackers(String partitionId, String connectionId
             }
         }
 
-        var targetBucketHashSet = expiringQueue.getHashSetForTimestamp(timestampMillis);
+        var targetBucketHashSet = getHashSetForTimestampWhileExpiringOldBuckets(expiringQueue, timestampMillis);
+
         if (targetBucketHashSet == null) {
             var startOfWindow = expiringQueue.firstKey().toInstant();
             assert !timestampMillis.test(startOfWindow, (ts, windowStart) -> ts < windowStart) :
@@ -308,7 +112,7 @@ private boolean updateExpirationTrackers(String partitionId, String connectionId
             return false;
         }
         if (lastPacketTimestamp.millis > ACCUMULATION_TIMESTAMP_NOT_SET_YET_SENTINEL) {
-            var sourceBucket = expiringQueue.getHashSetForTimestamp(lastPacketTimestamp);
+            var sourceBucket = getHashSetForTimestampWhileExpiringOldBuckets(expiringQueue, lastPacketTimestamp);
             if (sourceBucket != targetBucketHashSet) {
                 if (sourceBucket == null) {
                     behavioralPolicy.onNewDataArrivingAfterItsAccumulationHasBeenExpired(partitionId, connectionId,
@@ -323,7 +127,16 @@ private boolean updateExpirationTrackers(String partitionId, String connectionId
         return true;
     }
 
-    Accumulation get(String partitionId, String connectionId, Instant timestamp) {
+    private ConcurrentHashMap<String, Boolean>
+    getHashSetForTimestampWhileExpiringOldBuckets(ExpiringKeyQueue expiringQueue,
+                                                  EpochMillis timestampMillis) {
+        return expiringQueue.getHashSetForTimestamp(timestampMillis,
+                () -> expiringQueue.expireOldSlots(connectionAccumulationMap,
+                        behavioralPolicy, minimumGuaranteedLifetime, timestampMillis)
+        );
+    }
+
+    public Accumulation get(String partitionId, String connectionId, Instant timestamp) {
         var accumulation = connectionAccumulationMap.get(new ScopedConnectionIdKey(partitionId, connectionId));
         if (accumulation == null) {
             return null;
@@ -334,7 +147,7 @@ Accumulation get(String partitionId, String connectionId, Instant timestamp) {
         return accumulation;
     }
 
-    Accumulation getOrCreate(String partitionId, String connectionId, Instant timestamp) {
+    public Accumulation getOrCreate(String partitionId, String connectionId, Instant timestamp) {
         var key = new ScopedConnectionIdKey(partitionId, connectionId);
         var accumulation = connectionAccumulationMap.computeIfAbsent(key, k->new Accumulation());
         if (!updateExpirationTrackers(partitionId, connectionId, new EpochMillis(timestamp), accumulation, 0)) {
@@ -344,7 +157,7 @@ Accumulation getOrCreate(String partitionId, String connectionId, Instant timest
         return accumulation;
     }
 
-    Accumulation remove(String partitionId, String id) {
+    public Accumulation remove(String partitionId, String id) {
         var accum = connectionAccumulationMap.remove(new ScopedConnectionIdKey(partitionId, id));
         if (accum != null) {
             accum.newestPacketTimestampInMillis.set(ACCUMULATION_DEAD_SENTINEL);
@@ -357,11 +170,11 @@ Accumulation remove(String partitionId, String id) {
         // connections would take up 1MB of key characters + map overhead)
     }
 
-    Stream<Accumulation> values() {
+    public Stream<Accumulation> values() {
         return connectionAccumulationMap.values().stream();
     }
 
-    void clear() {
+    public void clear() {
         nodeToExpiringBucketMap.clear();
         // leave everything else fall aside, like we do for remove()
     }
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ScopedConnectionIdKey.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ScopedConnectionIdKey.java
new file mode 100644
index 000000000..5d2bcf0ff
--- /dev/null
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/traffic/expiration/ScopedConnectionIdKey.java
@@ -0,0 +1,11 @@
+package org.opensearch.migrations.replay.traffic.expiration;
+
+import lombok.AllArgsConstructor;
+import lombok.EqualsAndHashCode;
+
+@AllArgsConstructor
+@EqualsAndHashCode
+class ScopedConnectionIdKey {
+    public final String nodeId;
+    public final String connectionId;
+}
diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapSequentialTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapSequentialTest.java
index d855d0723..e25cef6c9 100644
--- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapSequentialTest.java
+++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapSequentialTest.java
@@ -2,6 +2,8 @@
 
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.opensearch.migrations.replay.traffic.expiration.BehavioralPolicy;
+import org.opensearch.migrations.replay.traffic.expiration.ExpiringTrafficStreamMap;
 
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
@@ -19,7 +21,7 @@ public static void testLinearExpirations(Function<Integer,String> connectionGene
                                       int expectedExpirationCounts[]) {
         var expiredAccumulations = new ArrayList<Accumulation>();
         var expiringMap = new ExpiringTrafficStreamMap(Duration.ofSeconds(window), Duration.ofSeconds(granularity),
-                new ExpiringTrafficStreamMap.BehavioralPolicy() {
+                new BehavioralPolicy() {
                     @Override
                     public void onExpireAccumulation(String partitionId,
                                                      String connectionId,
diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapUnorderedTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapUnorderedTest.java
index c821ad6b3..0c3e1935f 100644
--- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapUnorderedTest.java
+++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/ExpiringTrafficStreamMapUnorderedTest.java
@@ -2,6 +2,8 @@
 
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.opensearch.migrations.replay.traffic.expiration.BehavioralPolicy;
+import org.opensearch.migrations.replay.traffic.expiration.ExpiringTrafficStreamMap;
 
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
@@ -21,7 +23,7 @@ public void testExpirations(Function<Integer,String> connectionGenerator, int wi
                                 int expectedExpirationCounts[]) {
         var expiredAccumulations = new ArrayList<Accumulation>();
         var expiringMap = new ExpiringTrafficStreamMap(Duration.ofSeconds(window), Duration.ofSeconds(granularity),
-                new ExpiringTrafficStreamMap.BehavioralPolicy() {
+                new BehavioralPolicy() {
                     @Override
                     public void onExpireAccumulation(String partitionId,
                                                      String connectionId,

From 547a5ad7db0382fdddd286aaa5de00f7e126a876 Mon Sep 17 00:00:00 2001
From: Kartik Ganesh <gkart@amazon.com>
Date: Wed, 28 Jun 2023 13:01:03 -0700
Subject: [PATCH 57/57] [Index configuration tool] Add support for insecure
 HTTPS endpoint (#218)

This commit adds support for parsing the "insecure" flag from the Data Prepper YAML. This translates to a "verify" flag that is passed to the Python requests library. This commit also includes unit tests for this logic.

Signed-off-by: Kartik Ganesh <gkart@amazon.com>

---------

Signed-off-by: Kartik Ganesh <gkart@amazon.com>
---
 index_configuration_tool/index_operations.py |  4 ++--
 index_configuration_tool/main.py             | 22 +++++++++++++++++---
 index_configuration_tool/tests/test_main.py  | 19 ++++++++++++++++-
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/index_configuration_tool/index_operations.py b/index_configuration_tool/index_operations.py
index 51fa0e19a..22385c5ad 100644
--- a/index_configuration_tool/index_operations.py
+++ b/index_configuration_tool/index_operations.py
@@ -11,9 +11,9 @@
 __INTERNAL_SETTINGS_KEYS = ["creation_date", "uuid", "provided_name", "version", "store"]
 
 
-def fetch_all_indices(endpoint: str, optional_auth: Optional[tuple] = None) -> dict:
+def fetch_all_indices(endpoint: str, optional_auth: Optional[tuple] = None, verify: bool = True) -> dict:
     actual_endpoint = endpoint + __ALL_INDICES_ENDPOINT
-    resp = requests.get(actual_endpoint, auth=optional_auth)
+    resp = requests.get(actual_endpoint, auth=optional_auth, verify=verify)
     # Remove internal settings
     result = dict(resp.json())
     for index in result:
diff --git a/index_configuration_tool/main.py b/index_configuration_tool/main.py
index 944354068..8a1cdbf61 100644
--- a/index_configuration_tool/main.py
+++ b/index_configuration_tool/main.py
@@ -12,6 +12,18 @@
 HOSTS_KEY = "hosts"
 USER_KEY = "username"
 PWD_KEY = "password"
+INSECURE_KEY = "insecure"
+CONNECTION_KEY = "connection"
+
+
+# This config key may be either directly in the main dict (for sink)
+# or inside a nested dict (for source). The default value is False.
+def is_insecure(config: dict) -> bool:
+    if INSECURE_KEY in config:
+        return config[INSECURE_KEY]
+    elif CONNECTION_KEY in config and INSECURE_KEY in config[CONNECTION_KEY]:
+        return config[CONNECTION_KEY][INSECURE_KEY]
+    return False
 
 
 # TODO Only supports basic auth for now
@@ -30,7 +42,9 @@ def get_endpoint_info(plugin_config: dict) -> tuple:
 
 def fetch_all_indices_by_plugin(plugin_config: dict) -> dict:
     endpoint, auth_tuple = get_endpoint_info(plugin_config)
-    return index_operations.fetch_all_indices(endpoint, auth_tuple)
+    # verify boolean will be the inverse of the insecure SSL key, if present
+    should_verify = not is_insecure(plugin_config)
+    return index_operations.fetch_all_indices(endpoint, auth_tuple, should_verify)
 
 
 def check_supported_endpoint(config: dict) -> Optional[tuple]:
@@ -70,8 +84,10 @@ def validate_plugin_config(config: dict, key: str):
 
 
 def validate_pipeline_config(config: dict):
-    if SOURCE_KEY not in config or SINK_KEY not in config:
-        raise ValueError("Missing source or sink configuration in Data Prepper pipeline YAML")
+    if SOURCE_KEY not in config:
+        raise ValueError("Missing source configuration in Data Prepper pipeline YAML")
+    if SINK_KEY not in config:
+        raise ValueError("Missing sink configuration in Data Prepper pipeline YAML")
     validate_plugin_config(config, SOURCE_KEY)
     validate_plugin_config(config, SINK_KEY)
 
diff --git a/index_configuration_tool/tests/test_main.py b/index_configuration_tool/tests/test_main.py
index cde419328..3174abb96 100644
--- a/index_configuration_tool/tests/test_main.py
+++ b/index_configuration_tool/tests/test_main.py
@@ -10,6 +10,8 @@
 
 # Constants
 TEST_KEY = "test_key"
+INSECURE_KEY = "insecure"
+CONNECTION_KEY = "connection"
 BASE_CONFIG_SECTION = {
     TEST_KEY: [{"invalid_plugin1": {"key": "val"}}, {"invalid_plugin2": {}}]
 }
@@ -43,6 +45,21 @@ def setUp(self) -> None:
         with open(test_constants.PIPELINE_CONFIG_PICKLE_FILE_PATH, "rb") as f:
             self.loaded_pipeline_config = pickle.load(f)
 
+    def test_is_insecure_default_value(self):
+        self.assertFalse(main.is_insecure({}))
+
+    def test_is_insecure_top_level_key(self):
+        test_input = {"key": 123, INSECURE_KEY: True}
+        self.assertTrue(main.is_insecure(test_input))
+
+    def test_is_insecure_nested_key(self):
+        test_input = {"key1": 123, CONNECTION_KEY: {"key2": "val", INSECURE_KEY: True}}
+        self.assertTrue(main.is_insecure(test_input))
+
+    def test_is_insecure_missing_nested(self):
+        test_input = {"key1": 123, CONNECTION_KEY: {"key2": "val"}}
+        self.assertFalse(main.is_insecure(test_input))
+
     def test_get_auth_returns_none(self):
         # The following inputs should not return an auth tuple:
         # - Empty input
@@ -186,7 +203,7 @@ def test_validate_pipeline_config_missing_required_keys(self):
         # - Empty input
         # - missing output
         # - missing input
-        bad_configs = [{}, {"input": ()}, {"output": ()}]
+        bad_configs = [{}, {"source": {}}, {"sink": {}}]
         for config in bad_configs:
             self.assertRaises(ValueError, main.validate_pipeline_config, config)