diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..31c8f3d Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 5d903b0..96dc6e1 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,4 @@ cython_debug/ local-test local-test/* +.DS_Store diff --git a/demo/.DS_Store b/demo/.DS_Store new file mode 100644 index 0000000..fc99d1f Binary files /dev/null and b/demo/.DS_Store differ diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000..96146a4 Binary files /dev/null and b/docs/.DS_Store differ diff --git a/requirements.txt b/requirements.txt index 199e458..c368cd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,7 @@ click~=8.0 pydantic>=2.1.0,<3.0 rich>=13.7.0,<14.0 jinja2>=3.1.3,<4.0 -jsonpath-ng>=1.6.1,<2.0 \ No newline at end of file +jsonpath-ng>=1.6.1,<2.0 +pathlib +aws-cdk-lib==2.155.0 +constructs>=10.0.0,<11.0.0 \ No newline at end of file diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..fa26f4d Binary files /dev/null and b/src/.DS_Store differ diff --git a/stepfunctions/.DS_Store b/stepfunctions/.DS_Store new file mode 100644 index 0000000..2e44791 Binary files /dev/null and b/stepfunctions/.DS_Store differ diff --git a/stepfunctions/.gitignore b/stepfunctions/.gitignore new file mode 100644 index 0000000..e9071d0 --- /dev/null +++ b/stepfunctions/.gitignore @@ -0,0 +1,12 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.venv +*.egg-info +*.dist-info + +# CDK asset staging directory +.cdk.staging +cdk.out +.DS_Store diff --git a/stepfunctions/README.md b/stepfunctions/README.md new file mode 100644 index 0000000..af346c3 --- /dev/null +++ b/stepfunctions/README.md @@ -0,0 +1,120 @@ +# Bedrock Agent Evaluation Framework + +This project implements an automated evaluation framework for Amazon Bedrock Agents using AWS CDK, Step Functions, and Lambda. + +## Overview + +The framework automates the process of updating Bedrock Agents with new prompts, creating aliases, running evaluation scenarios, and cleaning up resources. It uses AWS Step Functions to orchestrate the workflow and AWS Lambda functions to perform individual tasks. + +The example provided is for an energy chatbot usecase + +## Components + +1. **CDK Stack (StepfunctionsStack)**: Defines the infrastructure, including Lambda functions, Step Functions state machine, and associated IAM roles. + +2. **Lambda Functions**: + - `generate_map`: Generates evaluation scenarios from S3 input. + - `check_agent_status_1` and `check_agent_status_2`: Check the status of Bedrock Agents. + - `update_bedrock_agent`: Updates the Bedrock Agent with new instructions. + - `create_alias`: Creates an alias for the updated agent. + - `run_test`: Executes evaluation scenarios using the `agenteval` library. + - `delete_alias`: Removes the temporary alias after evaluation. + +3. **Step Functions State Machine**: Orchestrates the evaluation workflow, including agent updates, status checks, and scenario execution. + +4. **S3 Bucket**: Stores evaluation prompts and results. + +5. **EventBridge Rule**: Triggers the Step Functions workflow when new evaluation prompts are uploaded to S3. + +## Workflow + +1. New evaluation prompts are uploaded to the S3 bucket. +2. The EventBridge rule triggers the Step Functions state machine. +3. The state machine updates the Bedrock Agent with new instructions. +4. An alias is created for the updated agent. +5. Evaluation scenarios are executed using the `agenteval` library. +6. Results are stored in the S3 bucket. +7. The temporary alias is deleted. + +## Setup and Deployment + +1. Ensure you have the AWS CDK installed and configured. +2. Install project dependencies: + ``` + npm install + ``` +3. Deploy the stack: + ``` + cdk deploy + ``` + +## Usage + +To run an evaluation: + +1. Prepare an evaluation JSON file with prompts and customer profiles. +2. Upload the file to the S3 bucket in the `evaluation_prompts/` prefix. +3. The evaluation process will start automatically. +4. Results will be available in the S3 bucket under the `results/` prefix. + +## Notes + +- Ensure proper IAM permissions are set up for accessing Bedrock, S3, and other AWS services. +- The `agenteval` library is assumed to be provided as a custom Lambda layer. + + +# CDK instructions + +The `cdk.json` file tells the CDK Toolkit how to execute your app. + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +(or `python` for Windows) executable in your path with access to the `venv` +package. If for any reason the automatic creation of the virtualenv fails, +you can create the virtualenv manually. + +To manually create a virtualenv on MacOS and Linux: + +``` +$ python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +``` +$ source .venv/bin/activate +``` + +If you are a Windows platform, you would activate the virtualenv like this: + +``` +% .venv\Scripts\activate.bat +``` + +Once the virtualenv is activated, you can install the required dependencies. + +``` +$ pip install -r requirements.txt +``` + +At this point you can now synthesize the CloudFormation template for this code. + +``` +$ cdk synth +``` + +To add additional dependencies, for example other CDK libraries, just add +them to your `setup.py` file and rerun the `pip install -r requirements.txt` +command. + +## Useful commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation + +Enjoy! diff --git a/stepfunctions/app.py b/stepfunctions/app.py new file mode 100644 index 0000000..bf4ff67 --- /dev/null +++ b/stepfunctions/app.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import os + +import aws_cdk as cdk + +from stepfunctions.stepfunctions_stack import StepfunctionsStack + + +app = cdk.App() +StepfunctionsStack(app, "StepfunctionsStack", + # If you don't specify 'env', this stack will be environment-agnostic. + # Account/Region-dependent features and context lookups will not work, + # but a single synthesized template can be deployed anywhere. + + # Uncomment the next line to specialize this stack for the AWS Account + # and Region that are implied by the current CLI configuration. + + #env=cdk.Environment(account=os.getenv('CDK_DEFAULT_ACCOUNT'), region=os.getenv('CDK_DEFAULT_REGION')), + + # Uncomment the next line if you know exactly what Account and Region you + # want to deploy the stack to. */ + + # env=cdk.Environment(region='us-east-1'), + + # For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html + ) + +app.synth() diff --git a/stepfunctions/cdk.json b/stepfunctions/cdk.json new file mode 100644 index 0000000..5553cf4 --- /dev/null +++ b/stepfunctions/cdk.json @@ -0,0 +1,70 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "source.bat", + "**/__init__.py", + "**/__pycache__", + "tests" + ] + }, + "context": { + "@aws-cdk/aws-lambda:recognizeLayerVersion": true, + "@aws-cdk/core:checkSecretUsage": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ], + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, + "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, + "@aws-cdk/aws-iam:minimizePolicies": true, + "@aws-cdk/core:validateSnapshotRemovalPolicy": true, + "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, + "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, + "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, + "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, + "@aws-cdk/core:enablePartitionLiterals": true, + "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, + "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, + "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, + "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, + "@aws-cdk/aws-route53-patters:useCertificate": true, + "@aws-cdk/customresources:installLatestAwsSdkDefault": false, + "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, + "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, + "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, + "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, + "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, + "@aws-cdk/aws-redshift:columnId": true, + "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, + "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, + "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, + "@aws-cdk/aws-kms:aliasNameRef": true, + "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, + "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, + "@aws-cdk/aws-efs:denyAnonymousAccess": true, + "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, + "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, + "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, + "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, + "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, + "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, + "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true, + "@aws-cdk/aws-cloudwatch-actions:changeLambdaPermissionLogicalIdForLambdaAction": true, + "@aws-cdk/aws-codepipeline:crossAccountKeysDefaultValueToFalse": true, + "@aws-cdk/aws-codepipeline:defaultPipelineTypeToV2": true, + "@aws-cdk/aws-kms:reduceCrossAccountRegionPolicyScope": true, + "@aws-cdk/aws-eks:nodegroupNameAttribute": true, + "@aws-cdk/aws-ec2:ebsDefaultGp3Volume": true, + "@aws-cdk/aws-ecs:removeDefaultDeploymentAlarm": true, + "@aws-cdk/custom-resources:logApiResponseDataPropertyTrueDefault": false, + "@aws-cdk/aws-s3:keepNotificationInImportedBucket": false + } +} diff --git a/stepfunctions/example_prompt_jsons/prompts_scenarios.json b/stepfunctions/example_prompt_jsons/prompts_scenarios.json new file mode 100644 index 0000000..936a47a --- /dev/null +++ b/stepfunctions/example_prompt_jsons/prompts_scenarios.json @@ -0,0 +1,158 @@ +{ "agent_id" : "ABCDEFGHIJ", + "agent_name": "agent_name", + "prompts": [ + { + "id":"1", + "prompt": "You are a customer-facing energy advisor with twenty years of experience at the UK's leading energy providers. Your role is to:\n1) Create a profile of the user by asking a few clarifying questions\n2) Generate a personalised recommendation to the user to solve their problem\n\nYou are empathetic and compassionate. You approach each question with care. You understand that rising energy prices can be a source of strain.\n\nYou are pragmatic. Do not make information up, if you do not know the answer - please be honest.\nBe friendly. Keep a conversational tone. Ask the customer questions one at a time.\nOnly ask a maximum of five questions before giving your personalised recommendation." + }, + + { + "id":"2", + "prompt": "You are a seasoned energy consultant with two decades of experience working for top UK energy companies. Your task is to:\n1)Build a customer profile through a brief series of targeted questions\n2)Offer a tailored solution to address the customer's energy-related concerns\nApproach each interaction with empathy and understanding, recognising the potential stress caused by increasing energy costs. Maintain a practical mindset and prioritise accuracy over speculation. Engage in a friendly, conversatinal manner, posing questions one at a time. Limit your inquiry to no more than five questions before presenting your customised recommendation" + }, + { + "id":"3", + "prompt": "You're an expert energy consultant who has spent two decades with the UK's top energy providers. Your mission is to:\n1)Craft a user profile using a few well-chosen questions\n2)Deliver a custom-tailored recommendation to address the user's energy concerns\n\nApproach each interaction with genuine care and understanding, recognizing the potential anxiety surrounding energy costs. Be practical in your approach, and honest about any limitations in your knowledge. Maintain a friendly, conversational tone throughout, asking questions one at a time. Limit yourself to a maximum of five questions before presenting your personalized recommendation" + } + ], + "customer_profiles":[ + { + "id": 1, + "profile": "Single Professional", + "household_size": 1, + "demography": "30-year-old urban professional", + "appliances": [ + "Laptop", "smartphone", "LED TV", "microwave", + "energy-efficient washer and dryer", "smart thermostat", + "electric car charger" + ], + "energy_usage": "Moderate, primarily in the evening and weekends", + "tarrif": "standard variable", + "payment_type": "on demand" + }, + { + "id": 2, + "profile": "Young Couple", + "household_size": 2, + "demography": "25-35 years old, urban", + "appliances": [ + "Two laptops", "two smartphones", "LED TV", "dishwasher", + "energy-efficient refrigerator", "smart home devices", + "washer and dryer" + ], + "energy_usage": "Moderate, spread throughout the day", + "tarrif": "dual fuel", + "payment_type": "direct debit" + }, + { + "id": 3, + "profile": "Family with Young Children", + "household_size": 4, + "demography": "35-45 years old parents with two children under 10, suburban", + "appliances": [ + "Multiple TVs", "gaming console", "desktop computer", "laptops", + "smartphones", "refrigerator", "dishwasher", "washer", "dryer", + "electric oven", "microwave", "air conditioning" + ], + "energy_usage": "High, spread throughout the day, peaks in the evening", + "tarrif": "pre-payment", + "payment_type":"pay bill in full within 14 days" + }, + { + "id": 4, + "profile": "Single Senior Citizen", + "household_size": 1, + "demography": "70-year-old retired individual, rural", + "appliances": [ + "TV", "radio", "landline phone", "microwave", "refrigerator", + "washer", "dryer", "medical equipment (e.g., oxygen concentrator)" + ], + "energy_usage": "Low to moderate, primarily during the day", + "tarrif":"standard variable", + "payment_type":"pay bill in full by Direct Debit" + }, + { + "id": 5, + "profile": "Roommates Sharing Apartment", + "household_size": 3, + "demography": "20-30 years old, urban", + "appliances": [ + "Three laptops", "three smartphones", "shared TV", + "shared kitchen appliances (microwave, refrigerator, oven)", + "washer and dryer" + ], + "energy_usage": "Moderate to high, varies throughout the day", + "tarrif": "dual fuel", + "payment_type":"on demand" + }, + { + "id": 6, + "profile": "Large Family", + "household_size": 6, + "demography": "40-50 years old parents with four children, suburban", + "appliances": [ + "Multiple TVs", "gaming consoles", "multiple laptops and smartphones", + "refrigerator", "two dishwashers", "washer and dryer", "microwave", + "electric oven", "central air conditioning" + ], + "energy_usage": "Very high, continuous throughout the day", + "tarrif":"pre-payment", + "payment_type":"direct debit" + }, + { + "id": 7, + "profile": "Eco-Conscious Couple", + "household_size": 2, + "demography": "30-40 years old, urban", + "appliances": [ + "Energy-efficient appliances", "solar panels", "electric car charger", + "LED lighting", "smart home devices", "minimalistic approach to electronic devices" + ], + "energy_usage": "Low to moderate, optimized for energy savings", + "tarrif": "standard variable", + "payment_type":"pay bill in full within 14 days" + }, + { + "id": 8, + "profile": "Student Living Alone", + "household_size": 1, + "demography": "20-year-old college student, urban", + "appliances": [ + "Laptop", "smartphone", "small TV", "microwave", "mini-fridge", + "electric kettle", "shared washer and dryer in building" + ], + "energy_usage": "Low, primarily in the evening and weekends", + "tarrif": "dual fuel", + "payment_type":"pay bill in full by Direct Debit" + }, + { + "id": 9, + "profile": "Retired Couple", + "household_size": 2, + "demography": "65-75 years old, rural", + "appliances": [ + "TV", "desktop computer", "smartphones", "refrigerator", + "microwave", "washer and dryer", "space heaters", + "medical equipment (e.g., CPAP machine)" + ], + "energy_usage": "Moderate, primarily during the day", + "tarrif": "pre-payment", + "payment_type":"on demand" + }, + { + "id": 10, + "profile": "Home-Based Business Owner", + "household_size": 1, + "demography": "45-year-old entrepreneur, suburban", + "appliances": [ + "Desktop computer", "laptop", "multiple smartphones", "printer", + "fax machine", "energy-efficient refrigerator", "microwave", + "washer and dryer" + ], + "energy_usage": "High, continuous throughout the day", + "tarrif": "standard variable", + "payment_type":"direct debit" + } + + ] +} \ No newline at end of file diff --git a/stepfunctions/layers/agent-evaluation/requirements.txt b/stepfunctions/layers/agent-evaluation/requirements.txt new file mode 100644 index 0000000..8885a87 --- /dev/null +++ b/stepfunctions/layers/agent-evaluation/requirements.txt @@ -0,0 +1 @@ +agent-evaluation \ No newline at end of file diff --git a/stepfunctions/layers/aws-lambda-powertools/requirements.txt b/stepfunctions/layers/aws-lambda-powertools/requirements.txt new file mode 100644 index 0000000..ed0b171 --- /dev/null +++ b/stepfunctions/layers/aws-lambda-powertools/requirements.txt @@ -0,0 +1 @@ +aws-lambda-powertools \ No newline at end of file diff --git a/stepfunctions/layers/jinja2/requirements.txt b/stepfunctions/layers/jinja2/requirements.txt new file mode 100644 index 0000000..1c579e7 --- /dev/null +++ b/stepfunctions/layers/jinja2/requirements.txt @@ -0,0 +1 @@ +jinja2 \ No newline at end of file diff --git a/stepfunctions/layers/pydantic/requirements.txt b/stepfunctions/layers/pydantic/requirements.txt new file mode 100644 index 0000000..59cc1e9 --- /dev/null +++ b/stepfunctions/layers/pydantic/requirements.txt @@ -0,0 +1 @@ +pydantic \ No newline at end of file diff --git a/stepfunctions/layers/pyyaml/requirements.txt b/stepfunctions/layers/pyyaml/requirements.txt new file mode 100644 index 0000000..4818cc5 --- /dev/null +++ b/stepfunctions/layers/pyyaml/requirements.txt @@ -0,0 +1 @@ +pyyaml \ No newline at end of file diff --git a/stepfunctions/requirements-dev.txt b/stepfunctions/requirements-dev.txt new file mode 100644 index 0000000..9270945 --- /dev/null +++ b/stepfunctions/requirements-dev.txt @@ -0,0 +1 @@ +pytest==6.2.5 diff --git a/stepfunctions/requirements.txt b/stepfunctions/requirements.txt new file mode 100644 index 0000000..c0a15e4 --- /dev/null +++ b/stepfunctions/requirements.txt @@ -0,0 +1,3 @@ +pathlib +aws-cdk-lib==2.155.0 +constructs>=10.0.0,<11.0.0 diff --git a/stepfunctions/source.bat b/stepfunctions/source.bat new file mode 100644 index 0000000..9e1a834 --- /dev/null +++ b/stepfunctions/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/stepfunctions/stepfunctions/__init__.py b/stepfunctions/stepfunctions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stepfunctions/stepfunctions/functions/check_agent_status_1/index.py b/stepfunctions/stepfunctions/functions/check_agent_status_1/index.py new file mode 100644 index 0000000..3a5ee67 --- /dev/null +++ b/stepfunctions/stepfunctions/functions/check_agent_status_1/index.py @@ -0,0 +1,33 @@ +import boto3 +import json +import os + +s3_client = boto3.client('s3') +bedrock_agent = boto3.client('bedrock-agent') + +from aws_lambda_powertools import Logger +logger = Logger() + +def handler(event, context): + + agent_id = event["agent_id"] + + logger.info("Getting agent status") + try: + response = bedrock_agent.get_agent( + agentId=agent_id + ) + agent_status = response["agent"]["agentStatus"] + logger.info(f"Agent status: {agent_status}") + except Exception as e: + logger.error(f"Error getting agent status: {e}") + + agent_status = response["agent"]["agentStatus"] + + + return { + 'statusCode': 200, + 'agent_id': agent_id, + 'agent_status': agent_status + } + \ No newline at end of file diff --git a/stepfunctions/stepfunctions/functions/check_agent_status_2/index.py b/stepfunctions/stepfunctions/functions/check_agent_status_2/index.py new file mode 100644 index 0000000..eb3fd07 --- /dev/null +++ b/stepfunctions/stepfunctions/functions/check_agent_status_2/index.py @@ -0,0 +1,32 @@ +import boto3 +import json +import os + +s3_client = boto3.client('s3') +bedrock_agent = boto3.client('bedrock-agent') + +from aws_lambda_powertools import Logger +logger = Logger() + +def handler(event, context): + + agent_id = event["update_output"]["agentid"] + + logger.info("Getting agent status") + try: + response = bedrock_agent.get_agent( + agentId=agent_id + ) + agent_status = response["agent"]["agentStatus"] + logger.info(f"Agent status: {agent_status}") + + except Exception as e: + logger.error(f"Erorr getting agent: {e}") + + agent_status = response["agent"]["agentStatus"] + + return { + 'statusCode': 200, + 'agent_id': agent_id, + 'agent_status': agent_status + } diff --git a/stepfunctions/stepfunctions/functions/create_alias/index.py b/stepfunctions/stepfunctions/functions/create_alias/index.py new file mode 100644 index 0000000..a75b644 --- /dev/null +++ b/stepfunctions/stepfunctions/functions/create_alias/index.py @@ -0,0 +1,37 @@ +import json +import boto3 +import uuid +from aws_lambda_powertools import Logger + +logger = Logger() + +def handler(event, context): + + bedrock_agent = boto3.client('bedrock-agent') + + agent_alias = str(uuid.uuid4()) + agent_id = event["update_output"]["agentid"] + + logger.info("Creating Agent Alias") + try: + alias_resp = bedrock_agent.create_agent_alias( + agentAliasName=agent_alias, + agentId=agent_id + ) + logger.info(f"Create Alias Response: {alias_resp}") + + except Exception as e: + logger.error(f"Error creating alias: {e}") + + + agent_id = alias_resp["agentAlias"]["agentId"] + agent_alias_id = alias_resp["agentAlias"]["agentAliasId"] + agent_alias_name = alias_resp["agentAlias"]["agentAliasName"] + + return { + 'prompt': event['prompt'], + 'agent_id':agent_id, + 'agent_alias_id': agent_alias_id, + 'agent_alias_name': agent_alias_name, + 'scenarios': event['scenarios'] + } \ No newline at end of file diff --git a/stepfunctions/stepfunctions/functions/delete_alias/index.py b/stepfunctions/stepfunctions/functions/delete_alias/index.py new file mode 100644 index 0000000..0e5c87b --- /dev/null +++ b/stepfunctions/stepfunctions/functions/delete_alias/index.py @@ -0,0 +1,31 @@ +import json +import boto3 +import uuid +import os +from aws_lambda_powertools import Logger + +logger = Logger() + +def handler(event, context): + + + agent_id = event["agent_id"] + agent_alias_id = event["agent_alias_id"] + + bedrock_agent = boto3.client('bedrock-agent') + logger.info("Deleting Agent Alias") + try: + response = bedrock_agent.delete_agent_alias( + agentAliasId=agent_alias_id, + agentId=agent_id + ) + logger.info(f"Delete alias response: {response}") + + except Exception as e: + logger.error(f"Error deleting agent alias : {e}") + + return { + 'statusCode': 200, + 'agentid':agent_id + } + diff --git a/stepfunctions/stepfunctions/functions/generate_map/index.py b/stepfunctions/stepfunctions/functions/generate_map/index.py new file mode 100644 index 0000000..873b7e7 --- /dev/null +++ b/stepfunctions/stepfunctions/functions/generate_map/index.py @@ -0,0 +1,46 @@ +import boto3 +import json +import os + +s3_client = boto3.client('s3') + +from aws_lambda_powertools import Logger + +logger = Logger() + +def handler(event, context): + + bucket = event["detail"]["bucket"]["name"] + key = event["detail"]["object"]["key"] + + logger.info("Fetching scenarios") + try: + scenario_json = s3_client.get_object(Bucket=bucket, Key=key) + text = json.loads(scenario_json["Body"].read()) + logger.info(text) + except Exception as e: + logger.error(f"Error getting object: {e}") + + + prompts = text['prompts'] + profiles = text['customer_profiles'] + + + # Generate scenarios + scenarios = [] + + for prompt in prompts: + item = { + 'prompt': prompt['prompt'], + 'scenarios': profiles + } + scenarios.append(item) + + + return { + 'statusCode': 200, + 'agent_id': text["agent_id"], + 'agent_name': text["agent_name"], + 'body': scenarios + } + \ No newline at end of file diff --git a/stepfunctions/stepfunctions/functions/run_test/index.py b/stepfunctions/stepfunctions/functions/run_test/index.py new file mode 100644 index 0000000..b6b0a6d --- /dev/null +++ b/stepfunctions/stepfunctions/functions/run_test/index.py @@ -0,0 +1,141 @@ +import yaml +import json +import datetime +import os +import shutil +import threading +import time +import boto3 +import uuid + +from agenteval.runner import Runner +from agenteval.plan import Plan + +s3_client = boto3.client('s3') + + +def handler(event, context): + + scenario = event['scenario'] + prompt = event['prompt'] + agent_id = event['agent_id'] + agent_alias_id = event['agent_alias_id'] + agent_alias_name = event['agent_alias_name'] + bucket_name = os.environ["EVALUATION_BUCKET"] + uid = uuid.uuid4() + + user_profile = { + 'demographic': scenario['demography'], + 'household_size': scenario['household_size'], + 'appliances': scenario['appliances'], + 'energy_usage': scenario['energy_usage'], + 'tariff': scenario['tarrif'], + 'payment_type': scenario['payment_type'] + } + + profile_str = yaml.safe_dump(user_profile, default_flow_style=False, sort_keys=False) + + yaml_data = { + 'evaluator': { + 'model': 'claude-3', + 'region': 'us-east-1' + }, + 'target': { + 'type': 'bedrock-agent', + 'bedrock_agent_id': agent_id, + 'bedrock_agent_alias_id': agent_alias_id + }, + 'tests': { + 'provide recommendation to customer in need': { + 'profile': user_profile, + 'max_turns': 10, + 'steps': [ + 'Ask the agent how you can reduce your energy bills', + 'Respond to the agents questions using the details in:', + profile_str, + 'Respond to the agents questions using the details in:', + profile_str, + 'Respond to the agents questions using the details in:', + profile_str, + 'Respond to the agents questions using the details in:', + profile_str + ], + 'expected_results': [ + 'The agent asks the user questions to create a profile', + 'The agent asks the user questions to create a profile', + 'The agent asks the user questions to create a profile', + 'The agent asks the user questions to create a profile', + 'The agent returns a recommendation' + ] + } + } + } + + + # Convert to YAML + yaml_output = yaml.safe_dump(yaml_data, sort_keys=False, default_flow_style=False) + + yaml_dir = "/tmp/plan" + local_yaml_path = f"{yaml_dir}/agenteval.yml" + os.makedirs(os.path.dirname(local_yaml_path), exist_ok=True) + + with open(local_yaml_path,"w") as file: + file.write(yaml_output) + + + plan = Plan.load(plan_dir=yaml_dir, filter=None) + + + now = datetime.datetime.now() + created_at = now.strftime("%Y-%m-%d %H:%M:%S") + test_result_dir = f"/tmp/results/" + + + runner = Runner( + plan=plan, + verbose=False, + num_threads=None, + work_dir = test_result_dir + ) + + try: + + runner_thread = threading.Thread(target=runner.run) + runner_thread.start() + + start_time = datetime.datetime.now() + num_completed = 0 + + while num_completed < runner.num_tests: + time.sleep(1) + num_completed = len(list(filter(lambda x:x != None, runner.results.values()))) + percentage = num_completed / runner.num_tests + + runner_thread.join() + now = datetime.datetime.now() + status = "completed" + finished_at = now.strftime("%Y-%m-%d %H:%M:%S") + + test_passed_rate = ( + f"{runner.num_tests - runner.num_failed}/ {runner.num_tests}" + ) + + + with open(os.path.join(test_result_dir, "agenteval_summary.md")) as f: + result = f.read() + + s3_key = f"results/{agent_alias_name}/{uid}/results.md" + s3_client.put_object(Bucket=bucket_name, Key=s3_key, Body=result) + + + except Exception as e: + status = "error" + + return{ + 'created_at': created_at, + 'finished_at':finished_at, + 'target_type': yaml_data["target"]["type"], + 'status': status, + 'test_passed_rate':test_passed_rate + } + \ No newline at end of file diff --git a/stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py b/stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py new file mode 100644 index 0000000..3ba810c --- /dev/null +++ b/stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py @@ -0,0 +1,49 @@ +import json +import boto3 +import uuid +import os +from aws_lambda_powertools import Logger + +logger = Logger() + +@logger.inject_lambda_context +def handler(event, context): + + agent_id = event["agent_id"] + agent_name=event["agent_name"] + agent_role = os.environ['AGENT_ROLE'] + + model = 'anthropic.claude-3-sonnet-20240229-v1:0' + + instruction = event['prompt'] + + bedrock_agent = boto3.client('bedrock-agent') + + logger.info("Updating Agent") + try: + + update_resp = bedrock_agent.update_agent( + agentId=agent_id, + agentName=agent_name, + agentResourceRoleArn=agent_role, + foundationModel=model, + instruction=instruction, + + ) + logger.info(f"Update agent response: {update_resp}") + except Exception as e: + logger.error(f"Error updating agent: {e}") + + logger.info("Preparing Agent") + try: + prep_resp = bedrock_agent.prepare_agent(agentId=agent_id) + logger.info(f"Prepaing Agent response: {prep_resp}") + except Exception as e: + logger.error(f"Error preparing agent : {e}") + + + return { + 'statusCode': 200, + 'agentid':agent_id + } + diff --git a/stepfunctions/stepfunctions/layer/__init__.py b/stepfunctions/stepfunctions/layer/__init__.py new file mode 100644 index 0000000..1f229c9 --- /dev/null +++ b/stepfunctions/stepfunctions/layer/__init__.py @@ -0,0 +1 @@ +from .layer import Layer \ No newline at end of file diff --git a/stepfunctions/stepfunctions/layer/layer.py b/stepfunctions/stepfunctions/layer/layer.py new file mode 100644 index 0000000..a706586 --- /dev/null +++ b/stepfunctions/stepfunctions/layer/layer.py @@ -0,0 +1,60 @@ +import aws_cdk as cdk +import os +from constructs import Construct +from aws_cdk import ( + aws_s3_assets as assets, + aws_lambda as lambda_, +) + + +class Layer(Construct): + layer_version: lambda_.ILayerVersion + layer_version_arn: str + + def __init__( + self, + scope: Construct, + construct_id: str, + architecture: lambda_.Architecture, + runtime: lambda_.Runtime, + path: str, + **kwargs, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + print(f"architecture {architecture}") + print(f"runtime {runtime}") + + default_platform_flag = os.environ.get("DOCKER_DEFAULT_PLATFORM") + print(f"DOCKER_DEFAULT_PLATFORM={default_platform_flag}") + + layer_assets = assets.Asset( + self, + "LayerAsset", + path=path, + bundling=cdk.BundlingOptions( + image=runtime.bundling_image, + platform=architecture.docker_platform, + output_type=cdk.BundlingOutput.AUTO_DISCOVER, + security_opt="no-new-privileges:true", # https://docs.docker.com/engine/reference/commandline/run/#optional-security-options---security-opt + network="host", + command=[ + "bash", + "-c", + "pip install -r requirements.txt -t /asset-output/python && cp -au . /asset-output/python", + ], + ), + ) + + layer = lambda_.LayerVersion( + self, + "Layer", + removal_policy=cdk.RemovalPolicy.DESTROY, + code=lambda_.Code.from_bucket( + bucket=layer_assets.bucket, key=layer_assets.s3_object_key + ), + compatible_architectures=[architecture], + ) + + self.layer_version = layer + self.layer_version_arn = layer.layer_version_arn diff --git a/stepfunctions/stepfunctions/stepfunctions_stack.py b/stepfunctions/stepfunctions/stepfunctions_stack.py new file mode 100644 index 0000000..934b3e0 --- /dev/null +++ b/stepfunctions/stepfunctions/stepfunctions_stack.py @@ -0,0 +1,440 @@ +import os +import pathlib + +import aws_cdk as cdk + +from aws_cdk import ( + Duration, + Stack, + aws_lambda as _lambda, + aws_stepfunctions_tasks as tasks, + aws_events as events, + aws_events_targets as targets, + aws_stepfunctions as sfn, + aws_iam as iam +) +from constructs import Construct +from .layer import Layer +architecture = _lambda.Architecture.X86_64 +runtime = _lambda.Runtime.PYTHON_3_12 + +class StepfunctionsStack(Stack): + + def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + + evaluation_bucket = cdk.aws_s3.Bucket( + self, + "EvaluationBucket", + event_bridge_enabled=True + ) + + powertools_layer = Layer( + self, + "PowertoolsLayer", + architecture=architecture, + runtime=runtime, + path=os.path.join( + pathlib.Path(__file__).parent.resolve().parent, + "layers", + "aws-lambda-powertools" + ) + ) + + agenteval_layer = Layer( + self, + "AgentEvalLayer", + architecture=architecture, + runtime=runtime, + path=os.path.join( + pathlib.Path(__file__).parent.resolve().parent, + "layers", + "agent-evaluation" + ) + ) + + generate_map_function = _lambda.Function( + self, + "GenerateMapFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "generate_map", + ) + ), + layers=[powertools_layer.layer_version] + ) + + generate_map_step = tasks.LambdaInvoke( + self, + "Generate Map State", + lambda_function = generate_map_function, + payload=sfn.TaskInput.from_json_path_at("$"), + output_path=sfn.JsonPath.string_at("$.Payload") + ) + + + get_status_function_1 = _lambda.Function( + self, + "GetStatusFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "check_agent_status_1", + ) + ), + layers=[powertools_layer.layer_version] + ) + + get_status_step_1 = tasks.LambdaInvoke( + self, + "Get Status 1", + lambda_function=get_status_function_1, + payload=sfn.TaskInput.from_json_path_at("$"), + result_selector = { + "agentid": sfn.JsonPath.string_at("$.Payload.agent_id"), + "agentstatus": sfn.JsonPath.string_at("$.Payload.agent_status"), + "full_payload": sfn.JsonPath.string_at("$")}, + result_path = sfn.JsonPath.string_at("$.status_output_1") + ) + + + agent_role = iam.Role( + self, + "AgentRole", + assumed_by=iam.ServicePrincipal("bedrock.amazonaws.com"), + managed_policies=[ + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonBedrockFullAccess") + ]) + + update_agent_function = _lambda.Function( + self, + "UpdateAgentFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "update_bedrock_agent", + ) + ), + layers=[powertools_layer.layer_version] + ) + + + + update_agent_function.add_environment("AGENT_ROLE",agent_role.role_arn) + + update_agent_step = tasks.LambdaInvoke( + self, + "Update Agent", + lambda_function = update_agent_function, + payload=sfn.TaskInput.from_json_path_at("$"), + result_path = "$.update_output", + result_selector = { + "agentid": sfn.JsonPath.string_at("$.Payload.agentid") + } + ) + + first_choice = sfn.Choice(self, "UpdateChoice1") + + condition1 = sfn.Condition.or_( + sfn.Condition.string_equals("$.status_output_1.agentstatus", "UPDATING"), + sfn.Condition.string_equals("$.status_output_1.agentstatus", "VERSIONING") + ) + + wait_step= sfn.Wait( + self, + "Wait1", + time=sfn.WaitTime.duration(Duration.seconds(30)) + ) + + + create_alias_function = _lambda.Function( + self, + "CreateAliasFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "create_alias", + ) + ), + layers=[powertools_layer.layer_version] + ) + + create_alias_step = tasks.LambdaInvoke( + self, + "Create Alias", + lambda_function = create_alias_function, + payload=sfn.TaskInput.from_json_path_at("$"), + output_path = sfn.JsonPath.string_at("$.Payload"), + + ) + + get_status_function_2 = _lambda.Function( + self, + "GetStatusFunction2", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "check_agent_status_2", + ) + ), + layers=[powertools_layer.layer_version] + ) + + get_status_step_2 = tasks.LambdaInvoke( + self, + "Get Status 2", + lambda_function=get_status_function_2, + payload=sfn.TaskInput.from_json_path_at("$"), + result_selector = { + "agentid": sfn.JsonPath.string_at("$.Payload.agent_id"), + "agentstatus": sfn.JsonPath.string_at("$.Payload.agent_status"), + "full_payload": sfn.JsonPath.string_at("$")}, + result_path = "$.status_output_2") + + + + second_choice = sfn.Choice(self, "UpdateChoice2") + condition2 = sfn.Condition.not_( + sfn.Condition.string_equals("$.status_output_2.agentstatus", "PREPARED"), + ) + wait_step_2= sfn.Wait( + self, + "Wait2", + time=sfn.WaitTime.duration(Duration.seconds(30)) + ) + + + agent_alias_map = sfn.Map( + self, + "Agent Alias Map", + max_concurrency=1, + items_path = sfn.JsonPath.string_at("$.body"), + parameters={ + "agent_id": sfn.JsonPath.string_at("$.agent_id"), + "agent_name": sfn.JsonPath.string_at("$.agent_name"), + "prompt": sfn.JsonPath.string_at("$$.Map.Item.Value.prompt"), + "scenarios": sfn.JsonPath.string_at("$$.Map.Item.Value.scenarios") + } + #you can only update an agent one at a time + # + ) + + pass_step = sfn.Pass(self, + "Pass State" + ) + + run_test_function = _lambda.Function( + self, + "RunTestFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "run_test", + ) + ), + layers=[agenteval_layer.layer_version], + environment={ + "EVALUATION_BUCKET": evaluation_bucket.bucket_name, + }, + ) + run_test_step = tasks.LambdaInvoke( + self, + "Run Test", + lambda_function=run_test_function, + payload=sfn.TaskInput.from_json_path_at("$"), + result_path="$.run_test" + ) + + error_pass = sfn.Pass(self, "handle failure") + + run_test_step.add_catch(error_pass, + result_path="$.error") + test_map= sfn.Map( + self, + "Evaluation Map", + items_path = sfn.JsonPath.string_at("$.scenarios"), + parameters={ + "prompt": sfn.JsonPath.string_at("$.prompt"), + "agent_id": sfn.JsonPath.string_at("$.agent_id"), + "agent_alias_id": sfn.JsonPath.string_at("$.agent_alias_id"), + "agent_alias_name": sfn.JsonPath.string_at("$.agent_alias_name"), + "scenario": sfn.JsonPath.string_at("$$.Map.Item.Value") + }, + result_path="$.map_output" + ) + delete_alias_function = _lambda.Function( + self, + "DeleteAliasFunction", + runtime=runtime, + architecture=architecture, + timeout=cdk.Duration.minutes(5), + handler="index.handler", + code=_lambda.Code.from_asset( + os.path.join( + pathlib.Path(__file__).resolve().parent, + "functions", + "delete_alias", + ) + ), + layers=[powertools_layer.layer_version] + ) + + delete_alias_function.add_to_role_policy( + iam.PolicyStatement( + actions=["bedrock:*"], + resources=["*"], + ) + ) + + delete_alias_step = tasks.LambdaInvoke( + self, + "Delete Alias", + lambda_function = delete_alias_function, + payload=sfn.TaskInput.from_json_path_at("$"), + result_path = "$.Payload", + + ) + + map_definition_2= run_test_step.next(pass_step) + + test_map.iterator(map_definition_2) + + + # eval_function_timeout_minutes = 10 + map_definition = get_status_step_1.next( + first_choice.when(condition1, wait_step.next(get_status_step_1)).otherwise(update_agent_step + .next( + get_status_step_2.next( + second_choice.when(condition2,wait_step_2.next(get_status_step_2)).otherwise(create_alias_step.next( + test_map + ).next( + delete_alias_step + ) + ) + ) + ) + ) + ) + + + + agent_alias_map.iterator(map_definition) + + + + chain = generate_map_step.next(agent_alias_map) + + evaluator_state_machine = sfn.StateMachine( + self, + "EvaluatorState", + definition_body = sfn.DefinitionBody.from_chainable(chain)) + + + + evaluator_state_machine.role.attach_inline_policy( + iam.Policy( + self, + "BedrockPolicy", + statements=[ + iam.PolicyStatement( + actions=["bedrock:*"], + resources=["*"], + ) + ], + ) + ) + + + + on_put_rule = events.Rule( + self, + "InvokeState", + event_pattern=events.EventPattern( + source=["aws.s3"], + detail_type=[ + "Object Created" + ], + detail={ + "bucket": { + "name": [evaluation_bucket.bucket_name] + }, + "object": { + "key": [{"prefix": "evaluation_prompts"}]}, + }, + ), + ) + + on_put_rule.add_target(targets.SfnStateMachine(evaluator_state_machine)) + + get_status_function_1.add_to_role_policy( + iam.PolicyStatement( + actions=["bedrock:*"], + resources=["*"], + ) + ) + get_status_function_2.add_to_role_policy( + iam.PolicyStatement( + actions=["bedrock:*"], + resources=["*"], + ) + ) + + create_alias_function.add_to_role_policy( + iam.PolicyStatement( + actions=["bedrock:*"], + resources=["*"], + ) + ) + + generate_map_function.add_to_role_policy( + iam.PolicyStatement( + actions=["s3:*"], + resources=["*"], + ) + ) + + run_test_function.add_to_role_policy( + iam.PolicyStatement( + actions=["s3:*","bedrock:*"], + resources=["*"], + ) + ) + + update_agent_function.add_to_role_policy( + iam.PolicyStatement( + actions=["bedrock:*","iam:PassRole","iam:ListRoles"], + resources=["*"], + ) + ) \ No newline at end of file diff --git a/stepfunctions/tests/__init__.py b/stepfunctions/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stepfunctions/tests/unit/__init__.py b/stepfunctions/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stepfunctions/tests/unit/test_stepfunctions_stack.py b/stepfunctions/tests/unit/test_stepfunctions_stack.py new file mode 100644 index 0000000..ce36343 --- /dev/null +++ b/stepfunctions/tests/unit/test_stepfunctions_stack.py @@ -0,0 +1,6 @@ +import aws_cdk as core +import aws_cdk.assertions as assertions + +from stepfunctions.stepfunctions_stack import StepfunctionsStack + +