diff --git a/.github/workflows/azure-load-tests.yml b/.github/workflows/azure-load-tests.yml new file mode 100644 index 000000000..3600133d7 --- /dev/null +++ b/.github/workflows/azure-load-tests.yml @@ -0,0 +1,49 @@ +name: Azure Load Tests + +on: + workflow_dispatch: + schedule: + - cron: "0 0 * * 2" # Midnight UTC on Tuesdays + + workflow_call: + secrets: + AZURE_CLIENT_ID: + required: true + AZURE_TENANT_ID: + required: true + AZURE_SUBSCRIPTION_ID: + required: true + +jobs: + loadtest: + name: Load Test + environment: + name: internal + + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + # Checkout the repository + - name: Checkout Repository + uses: actions/checkout@v2 + + # Login to Azure using the CLI + - name: Login via Azure CLI + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + # Run the Azure Load Test + - name: Run Load Test + run: | + az load test-run create \ + --resource-group "csels-rsti-internal-moderate-rg" \ + --load-test-resource "jeff-load-test" \ + --test-id "9020b745-5fc4-4284-8803-04076ea09650" \ + --test-run-id "run_"`date +"%Y%m%d%_H%M%S"` \ + --description "Run by Github Action" diff --git a/adr/008-load-testing.md b/adr/008-load-testing.md index 56f4885b1..13c9f44b3 100644 --- a/adr/008-load-testing.md +++ b/adr/008-load-testing.md @@ -1,6 +1,7 @@ # 8. Load Testing -Date: 2022-12-21 +Initial date: 2022-12-21 +Updated: 2025-01-07 ## Decision @@ -16,11 +17,16 @@ Accepted. Load Testing will assist in determining the performance of a system under real-life load conditions, both normal and extreme. +In January 2025, we added the capability to run load tests automatically on a schedule in Azure. +In the deployed load tests, we're hitting mock ReportStream endpoints. This allows us to 1) not +bombard ReportStream with unexpected traffic and 2) identify performance issues that are specific +to the Intermediary. + ## Impact ### Positive -- **Scalability:** Locust.io is highly scalable and can simulate millions of users, making it ideal for both small-scale and large-scale load tests. +- **Scalability:** Locust.io is highly scalable and can simulate millions of users, making it ideal for both small-scale and large-scale load tests. - **Python-Based:** Writing tests in Python allows for flexibility and ease of use, especially for teams already familiar with the language. @@ -29,6 +35,9 @@ real-life load conditions, both normal and extreme. - **Cost Efficiency:** Locust.io is open-source, and doesn’t require licensing fees, which can reduce the overall cost of performance testing. +- **Azure:** Running the load tests on a schedule in a more realistic enviornment gives us more consistent data + + ### Negative - **Limited Features:** Compared to more feature-rich tools, Locust.io might lack advanced performance monitoring or detailed reporting features. @@ -48,6 +57,12 @@ real-life load conditions, both normal and extreme. - **Resource Usage:** Running large-scale tests using Locust.io may require significant system resources, which could impact cost and infrastructure planning. +- **Locust Future in Azure is Uncertain:** We were able to create a Locust test in Azure in November 2024, but as of January 2025, were unable to create another one. We've submitted a bug report, but if we remain unable to create Locust tests in Azure, we won't be able to expand this test setup to other environments + + +- **Azure Load Testing Cannot be Terraformed:** Since Azure Load Testing resources can't be created/managed in Terraform, they must be created manually. This is more work and more error prone + + ### Related Issues -- #76 +- #76, #1122 diff --git a/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java b/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java index cee103181..4540455c3 100644 --- a/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java +++ b/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java @@ -122,6 +122,10 @@ static DomainConnector constructNewDomainConnector(Class handler, boolean isProtected) { return (Context ctx) -> { + ApplicationContext + .clearThreadRegistrations(); // clear this thread's specific registrations from + // its previous use + LOGGER.logInfo(ctx.method().name() + " " + ctx.url()); var request = javalinContextToDomainRequest(ctx); diff --git a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java index 0711d05b7..1a6068b1c 100644 --- a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java +++ b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java @@ -173,6 +173,15 @@ DomainResponse handleResults(DomainRequest request) { } DomainResponse handleMetadata(DomainRequest request) { + // Any new endpoint that will call RS **must** include this check. + if (Boolean.parseBoolean(request.getHeaders().get("load-test")) + && ApplicationContext.isPropertyPresent("REPORT_STREAM_URL_PREFIX")) { + // register the mock RS endpoint for this HTTP request because we don't want to call RS + // for real when doing a load test. + ApplicationContext.registerForThread( + RSEndpointClient.class, MockRSEndpointClient.getInstance()); + } + try { String metadataId = request.getPathParams().get("id"); Optional metadataOptional = @@ -226,6 +235,15 @@ protected DomainResponse handleMessageRequest( boolean markMetadataAsFailed = false; String errorMessage = ""; + // Any new endpoint that will call RS **must** include this check. + if (Boolean.parseBoolean(request.getHeaders().get("load-test")) + && ApplicationContext.isPropertyPresent("REPORT_STREAM_URL_PREFIX")) { + // register the mock RS endpoint for this HTTP request because we don't want to call RS + // for real when doing a load test. + ApplicationContext.registerForThread( + RSEndpointClient.class, MockRSEndpointClient.getInstance()); + } + try { return requestHandler.handle(inboundReportId); } catch (FhirParseException e) { diff --git a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java index a70308ec7..8597c75b5 100644 --- a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java +++ b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java @@ -1,5 +1,6 @@ package gov.hhs.cdc.trustedintermediary.etor.metadata.partner; +import gov.hhs.cdc.trustedintermediary.context.ApplicationContext; import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient; import gov.hhs.cdc.trustedintermediary.etor.messagelink.MessageLink; import gov.hhs.cdc.trustedintermediary.etor.messagelink.MessageLinkException; @@ -31,7 +32,6 @@ public class PartnerMetadataOrchestrator { @Inject PartnerMetadataStorage partnerMetadataStorage; @Inject MessageLinkStorage messageLinkStorage; - @Inject RSEndpointClient rsclient; @Inject Formatter formatter; @Inject Logger logger; @@ -44,6 +44,9 @@ private PartnerMetadataOrchestrator() {} public void updateMetadataForInboundMessage(PartnerMetadata partnerMetadata) throws PartnerMetadataException { + // can't @Inject because the implementation can be different for this specific thread + RSEndpointClient rsclient = ApplicationContext.getImplementation(RSEndpointClient.class); + logger.logInfo( "Looking up sender name and timeReceived from RS delivery API for inboundReportId: {}", partnerMetadata.inboundReportId()); @@ -130,6 +133,11 @@ public Optional getMetadata(String inboundReportId) PartnerMetadata partnerMetadata = optionalPartnerMetadata.get(); var outboundReportId = partnerMetadata.outboundReportId(); if (metadataIsStale(partnerMetadata) && outboundReportId != null) { + + // can't @Inject because the implementation can be different for this specific thread + RSEndpointClient rsclient = + ApplicationContext.getImplementation(RSEndpointClient.class); + logger.logInfo( "Receiver name not found in metadata or delivery status still pending, looking up {} from RS history API", outboundReportId); diff --git a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java index e8fec6f52..dfc84c8ee 100644 --- a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java +++ b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java @@ -4,11 +4,9 @@ import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient; import gov.hhs.cdc.trustedintermediary.wrappers.AuthEngine; import gov.hhs.cdc.trustedintermediary.wrappers.Cache; -import gov.hhs.cdc.trustedintermediary.wrappers.HapiFhir; import gov.hhs.cdc.trustedintermediary.wrappers.HttpClient; import gov.hhs.cdc.trustedintermediary.wrappers.HttpClientException; import gov.hhs.cdc.trustedintermediary.wrappers.Logger; -import gov.hhs.cdc.trustedintermediary.wrappers.MetricMetadata; import gov.hhs.cdc.trustedintermediary.wrappers.SecretRetrievalException; import gov.hhs.cdc.trustedintermediary.wrappers.Secrets; import gov.hhs.cdc.trustedintermediary.wrappers.formatter.Formatter; @@ -45,13 +43,10 @@ public class ReportStreamEndpointClient implements RSEndpointClient { @Inject private HttpClient client; @Inject private AuthEngine jwt; @Inject private Formatter formatter; - @Inject private HapiFhir fhir; @Inject private Logger logger; @Inject private Secrets secrets; @Inject private Cache cache; - @Inject MetricMetadata metadata; - private static final ReportStreamEndpointClient INSTANCE = new ReportStreamEndpointClient(); public static ReportStreamEndpointClient getInstance() { diff --git a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java index ed63a24f4..c1bd14e1b 100644 --- a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java +++ b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java @@ -1,5 +1,6 @@ package gov.hhs.cdc.trustedintermediary.external.reportstream; +import gov.hhs.cdc.trustedintermediary.context.ApplicationContext; import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient; import gov.hhs.cdc.trustedintermediary.etor.messages.UnableToSendMessageException; import gov.hhs.cdc.trustedintermediary.etor.metadata.EtorMetadataStep; @@ -17,7 +18,6 @@ public class ReportStreamSenderHelper { private static final ReportStreamSenderHelper INSTANCE = new ReportStreamSenderHelper(); - @Inject RSEndpointClient rsclient; @Inject Formatter formatter; @Inject Logger logger; @Inject MetricMetadata metadata; @@ -41,6 +41,10 @@ public Optional sendResultToReportStream(String body, String fhirResourc protected Optional sendToReportStream( String body, String fhirResourceId, PartnerMetadataMessageType messageType) throws UnableToSendMessageException { + + // can't @Inject because the implementation can be different for this specific thread + RSEndpointClient rsclient = ApplicationContext.getImplementation(RSEndpointClient.class); + String bearerToken; String rsResponseBody; diff --git a/operations/locustfile.py b/operations/locustfile.py index 03a87ec2a..450c33774 100644 --- a/operations/locustfile.py +++ b/operations/locustfile.py @@ -4,6 +4,7 @@ import urllib.parse import urllib.request import uuid +import os from locust import FastHttpUser, between, events, task from locust.runners import MasterRunner @@ -19,6 +20,8 @@ result_request_body = None auth_request_body = None +in_azure = os.getenv('TEST_RUN_NAME') is not None + class SampleUser(FastHttpUser): # Each task gets called randomly, but the number next to '@task' denotes @@ -69,6 +72,7 @@ def post_message_request(self, endpoint, message): headers={ "Authorization": self.access_token, "RecordId": self.submission_id, + "Load-Test": "true", }, data=message.replace("{{placer_order_id}}", poi), ) @@ -88,7 +92,10 @@ def get_v1_etor_metadata(self): if self.message_api_called: self.client.get( f"{METADATA_ENDPOINT}/{self.submission_id}", - headers={"Authorization": self.access_token}, + headers={ + "Authorization": self.access_token, + "Load-Test": "true", + }, name=f"{METADATA_ENDPOINT}/{{id}}", ) @@ -118,6 +125,10 @@ def test_start(environment): @events.quitting.add_listener def assert_stats(environment): + if in_azure: + # don't evaluate this in Azure because we want the locust process to succeed and Azure does its own test criteria checking + return + if environment.stats.total.fail_ratio > 0.01: logging.error("Test failed due to failure ratio > 1%") environment.process_exit_code = 1 @@ -129,24 +140,37 @@ def assert_stats(environment): def get_auth_request_body(): - # set up the sample request body for the auth endpoint - # using a valid test token found in the mock_credentials directory - auth_scope = "report-stream" - with open("mock_credentials/report-stream-valid-token.jwt") as f: - auth_token = f.read() + # Set up the sample request body for the auth endpoint + # using a valid test token. For local testing, the jwt is found in the mock_credentials directory. + # For deployed load tests, the jwt is stored in Azure Key Vault. This jwt expires in December 2029 + if in_azure: + auth_token = os.getenv("trusted-intermediary-valid-token-jwt") + else: + with open("mock_credentials/trusted-intermediary-valid-token.jwt") as f: + auth_token = f.read() + params = urllib.parse.urlencode( - {"scope": auth_scope, "client_assertion": auth_token.strip()} + {"scope": "trusted-intermediary", "client_assertion": auth_token.strip()} ) + return params.encode("utf-8") def get_order_fhir_message(): # read the sample request body for the orders endpoint - with open("examples/Test/e2e/orders/002_ORM_O01_short.fhir", "r") as f: + file_path = "002_ORM_O01_short.fhir" + if not in_azure: + file_path = "examples/Test/e2e/orders/" + file_path + + with open(file_path, "r") as f: return f.read() def get_result_fhir_message(): # read the sample request body for the results endpoint - with open("examples/Test/e2e/results/001_ORU_R01_short.fhir", "r") as f: + file_path = "001_ORU_R01_short.fhir" + if not in_azure: + file_path = "examples/Test/e2e/results/" + file_path + + with open(file_path, "r") as f: return f.read() diff --git a/shared/src/main/java/gov/hhs/cdc/trustedintermediary/context/ApplicationContext.java b/shared/src/main/java/gov/hhs/cdc/trustedintermediary/context/ApplicationContext.java index 61b261c72..1d628bf06 100644 --- a/shared/src/main/java/gov/hhs/cdc/trustedintermediary/context/ApplicationContext.java +++ b/shared/src/main/java/gov/hhs/cdc/trustedintermediary/context/ApplicationContext.java @@ -13,6 +13,7 @@ import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -28,6 +29,8 @@ public class ApplicationContext { protected static final Map, Object> OBJECT_MAP = new ConcurrentHashMap<>(); + protected static final InheritableThreadLocal, Object>> THREAD_OBJECT_MAP = + new InheritableThreadLocal<>(); protected static final Map TEST_ENV_VARS = new ConcurrentHashMap<>(); protected static final Set IMPLEMENTATIONS = new HashSet<>(); @@ -40,7 +43,39 @@ public static void register(Class clazz, Object implementation) { IMPLEMENTATIONS.add(implementation.getClass()); } + /** + * Registers an implementation for a class _only_ for the current executing thread (which + * currently is one-to-one with an HTTP request). + */ + public static void registerForThread(Class clazz, Object implementation) { + Map, Object> threadObjectMap = THREAD_OBJECT_MAP.get(); + if (threadObjectMap == null) { + threadObjectMap = new HashMap<>(); + } + + threadObjectMap.put(clazz, implementation); + + THREAD_OBJECT_MAP.set(threadObjectMap); + + // The implementation may never have had anything injected into it + // (e.g. it wasn't part of the bootstrapping implementations registered into the + // ApplicationContext), + // so inject into the implementation now. + injectIntoNonSingleton(implementation); + } + + /** Removes the stored implementations for the current thread that calls this method. */ + public static void clearThreadRegistrations() { + THREAD_OBJECT_MAP.remove(); + } + public static T getImplementation(Class clazz) { + // check the thread local map first + Map, Object> threadObjectMap = THREAD_OBJECT_MAP.get(); + if (threadObjectMap != null && threadObjectMap.containsKey(clazz)) { + return (T) threadObjectMap.get(clazz); + } + T object = (T) OBJECT_MAP.get(clazz); if (object == null) {