diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62c8935 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ \ No newline at end of file diff --git a/cdk/opensearch-website-search/README.md b/cdk/opensearch-website-search/README.md new file mode 100644 index 0000000..e523488 --- /dev/null +++ b/cdk/opensearch-website-search/README.md @@ -0,0 +1,128 @@ +## CDK for deploying website search clusters + +This project deploys following stacks: +1. Network stack: Sets up networking resources like VPC, subnets, AZ, security group, etc. +2. Infrastructure stack: Sets up EC2 (installs ODFE 1.13.2 by default using userdata), cloudwatch logging, network load balancer. Check your cluster log in the log group created from your stack in the cloudwatch. +3. APIGatewayLambda stack: Sets up API Gateway with various endpoints and search lambda function that points to network load balancer create in Infra stack. +4. Monitoring stack: Create an AWS Lambda function that periodically monitors backend OpenSearch cluster and sends metrics to CloudWatch. +5. Bastion stack: Creates a group EC2 instances inside an AutoScaling groups spread across AZs which acts as SSH bastion hosts and can be accessed from restricted IP ranges as defined in stack. + +### CDK Installation + +[Install CDK](https://docs.aws.amazon.com/cdk/latest/guide/cli.html) using `npm install -g aws-cdk` + + +The `cdk.json` file tells the CDK Toolkit how to execute your app. + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +(or `python` for Windows) executable in your path with access to the `venv` +package. If for any reason the automatic creation of the virtualenv fails, +you can create the virtualenv manually. + +To manually create a virtualenv on MacOS and Linux: + +``` +$ python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +``` +$ source .venv/bin/activate +``` + +If you are a Windows platform, you would activate the virtualenv like this: + +``` +% .venv\Scripts\activate.bat +``` + +Once the virtualenv is activated, you can install the required dependencies. + +``` +$ pip install -r requirements.txt +``` + +At this point you can now synthesize the CloudFormation template for this code. + +``` +$ cdk synth +``` + +To add additional dependencies, for example other CDK libraries, just add +them to your `setup.py` file and rerun the `pip install -r requirements.txt` +command. + +## Prerequisites +1. Python 3 required to run CDK +2. AWS credentials configured locally or you can pass them during deployment in app.py file. [More information](https://docs.aws.amazon.com/cdk/latest/guide/environments.html) +3. EC2 keypair in the deploying region to passed as context variable. Make sure to store the private key safely or to AWS Secrets Manager. +4. Users and passwords for search and monitoring Lambda functions. These should be created in AWS Secrets Manager. Should be passed as environment variables or context variables on command lines. Avoid persisting them `cdk.context.json` . +``` +SEARCH_USER +SEARCH_PASS +MONITORING_USER +MONITORING_PASS +``` + While OpenSearch is bootstrapped on EC2 nodes, this usernames and passwords will be fetched from AWS Secrets Manager and respective users will created along with roles and role mappings. + + +## Cluster deploy +The cdk currently only supports TAR distribution hence passing any other argument as distribution would result in error. +You can check the cdk.context.json file for the default context variables. Consider them as parameters. Enter the appropriate values for keypair, url and dashboards_url. +Any of the context variable can be overwritten using the `-c` or `--context` flag in the deploy command. + +In order to deploy the stacks follow the following steps: +1. Activate the python virtual environment +2. Enter the required values in `cdk.context.json` file: + - cidr: CIDR to create VPC with (defaults to 10.9.0.0/21). + - distribution: currently we only support `tar` distribution. + - keypair: your EC2 keypair in the deploying region. Please check that the key exists and you are deploying in the same region, + - url: OpenSearch download url eg:https://artifacts.opensearch.org/snapshots/bundle/opensearch/1.0.0-rc1/opensearch-1.0.0-rc1-linux-x64.tar.gz , + - dashboards_url: OpenSearch download url eg: https://artifacts.opensearch.org/snapshots/bundle/opensearch-dashboards/1.0.0-rc1/opensearch-dashboards-1.0.0-rc1-linux-x64.tar.gz + +Please check that the urls are valid as they won't throw an error explicitly. These links are used in the userdata of an EC2 instance. + +3. If you have all values entered in cdk.context.json: + ``` + cdk deploy --all + ``` + If you want to enter the parameters via command line: + ``` + cdk deploy --all -c keypair=your_ec2_keyPair -c url= -c dashboards_url= + ``` + For non-interactive shell: + ``` + cdk deploy --all --require-approval=never + ``` + +### SSH +Both data nodes and master nodes are SSH via bastion hosts. Use [ssh-bastion-ec2](tools/ssh-bastion-ec2/ssh-bastion-ec2) tool. + + +## Teardown +To delete a particular stack use the command: +``` +cdk destroy +``` + +To delete all the created stacks together use the command +``` +cdk destroy --all +``` +_Note: If you deployed the stack using command line parameters (i.e. cdk.context.json has empty values), you need to pass the parameters during `cdk destroy` as well_ +``` +cdk destroy --all -c keypair=your_ec2_keyPair -c url= -c dashboards_url= +``` +## Useful commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation + + diff --git a/cdk/opensearch-website-search/app.py b/cdk/opensearch-website-search/app.py new file mode 100644 index 0000000..08f0aae --- /dev/null +++ b/cdk/opensearch-website-search/app.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import os + +# For consistency with TypeScript code, `cdk` is the preferred import name for +# the CDK's core module. The following line also imports it as `core` for use +# with examples from the CDK Developer's Guide, which are in the process of +# being updated to use `cdk`. You may delete this import if you don't need it. +from aws_cdk import core, aws_ec2 as ec2 + +from website_search_cdk.network import Network +from website_search_cdk.infra import ClusterStack +from website_search_cdk.infra import Architecture, Security +from website_search_cdk.api_lambda import ApiLambdaStack +from website_search_cdk.monitoring import MonitoringStack +from website_search_cdk.alarms import AlarmsStack +from website_search_cdk.bastions import Bastions + +env = core.Environment(account=os.environ.get("CDK_DEPLOY_ACCOUNT", os.environ["CDK_DEFAULT_ACCOUNT"]), + region=os.environ.get("CDK_DEPLOY_REGION", os.environ["CDK_DEFAULT_REGION"])) +app = core.App() + +stack_prefix = app.node.try_get_context("stack_prefix") +if not stack_prefix: + raise ValueError(stack_prefix, "is either null or empty. Please use a prefix to differentiate" + " between stack and prevent from overriding other stacks") + +architecture = app.node.try_get_context("architecture") +if not (Architecture.has_value(architecture)): + raise ValueError(architecture, "is either null or not supported yet! Please use either x64 or arm64") + +security = app.node.try_get_context("security") +if not (Security.has_security_value(security)): + raise ValueError(security, "The keyword has to be either of these two: enable or disable.") + +cluster_stack_name = app.node.try_get_context("cluster_stack_name") +network_stack_name = app.node.try_get_context("network_stack_name") +search_access_stack_name = app.node.try_get_context("search_access_stack_name") +monitoring_stack_name = app.node.try_get_context("monitoring_stack_name") + +if not cluster_stack_name: + raise ValueError(" Cluster stack name cannot be None. Please provide the right stack name") +if not network_stack_name: + raise ValueError(" Network stack name cannot be None. Please provide the right stack name") + +# Default AMI points to latest AL2 +al2_ami = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + cpu_type=ec2.AmazonLinuxCpuType.X86_64) + +network = Network(app, stack_prefix + network_stack_name, + # If you don't specify 'env', this stack will be environment-agnostic. + # Account/Region-dependent features and context lookups will not work, + # but a single synthesized template can be deployed anywhere. + + # Uncomment the next line to specialize this stack for the AWS Account + # and Region that are implied by the current CLI configuration. + + # env=core.Environment(account=os.getenv('CDK_DEFAULT_ACCOUNT'), + # region=os.getenv('CDK_DEFAULT_REGION')), + + # Uncomment the next line if you know exactly what Account and Region you + # want to deploy the stack to. */ + + # env=core.Environment(account='123456789012', region='us-east-1'), + + # For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html + env=env, + ) +opensearh_infra = ClusterStack(app, stack_prefix + cluster_stack_name, vpc=network.vpc, sg=network.security_group, + architecture=architecture, security=security, env=env + ) + +api_lambda = ApiLambdaStack(app, stack_prefix + search_access_stack_name, network.vpc, opensearh_infra.nlb, + opensearh_infra.opensearch_listener, env=env) +monitoring = MonitoringStack(app, stack_prefix + monitoring_stack_name, network.vpc, opensearh_infra.nlb, env=env) +alarms = AlarmsStack(app, stack_prefix + "alarms", env=env) +bastion_host_infra = Bastions(app, stack_prefix + 'bastion-hosts', network.vpc, env=env) + +app.synth() diff --git a/cdk/opensearch-website-search/cdk.context.json b/cdk/opensearch-website-search/cdk.context.json new file mode 100644 index 0000000..2610ed8 --- /dev/null +++ b/cdk/opensearch-website-search/cdk.context.json @@ -0,0 +1,27 @@ +{ + "stack_prefix": "test-one-", + "cluster_stack_name": "opensearch-cluster", + "network_stack_name": "network", + "search_access_stack_name": "gateway-lambda", + "monitoring_stack_name": "monitoring", + "cidr": "10.9.0.0/21", + "distribution": "tar", + "keypair": "", + "architecture": "x64", + "ami_id": "", + "url": "https://artifacts.opensearch.org/releases/bundle/opensearch/1.0.0/opensearch-1.0.0-linux-x64.tar.gz", + "dashboards_url": "https://artifacts.opensearch.org/releases/bundle/opensearch-dashboards/1.0.0/opensearch-dashboards-1.0.0-linux-x64.tar.gz", + "master_node_count": "2", + "data_node_count": "3", + "client_node_count": "3", + "nlb_opensearch_port": "80", + "nlb_dashboards_port": "5601", + "security": "enable", + "search_user": "", + "search_pass": "", + "monitoring_user": "", + "monitoring_pass": "", + "allowed_origins": [ + "*" + ] +} diff --git a/cdk/opensearch-website-search/cdk.json b/cdk/opensearch-website-search/cdk.json new file mode 100644 index 0000000..0ef87a9 --- /dev/null +++ b/cdk/opensearch-website-search/cdk.json @@ -0,0 +1,13 @@ +{ + "app": "python3 app.py", + "context": { + "@aws-cdk/core:enableStackNameDuplicates": "true", + "aws-cdk:enableDiffNoFail": "true", + "@aws-cdk/core:stackRelativeExports": "true", + "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true, + "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true, + "@aws-cdk/aws-kms:defaultKeyPolicies": true, + "@aws-cdk/aws-s3:grantWriteWithoutAcl": true, + "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true + } +} diff --git a/cdk/opensearch-website-search/lambdas/monitoring-lambda/__init__.py b/cdk/opensearch-website-search/lambdas/monitoring-lambda/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cdk/opensearch-website-search/lambdas/monitoring-lambda/monitor.py b/cdk/opensearch-website-search/lambdas/monitoring-lambda/monitor.py new file mode 100644 index 0000000..9450681 --- /dev/null +++ b/cdk/opensearch-website-search/lambdas/monitoring-lambda/monitor.py @@ -0,0 +1,222 @@ +import json +import boto3 +import logging +import requests +import os +from enum import Enum +from requests.auth import HTTPBasicAuth + +# TODO: Adds redundant request ID into logs, fix logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +username = os.getenv('MONITORING_USER') +password = os.getenv('MONITORING_PASS') +nlb_endpoint = os.getenv('NLB_ENDPOINT') +nlb_opensearch_port = os.getenv('NLB_OPENSEARCH_PORT', "80") +nlb_dashboards_port = os.getenv('NLB_DASHBOARD_PORT', "5601") +opensearch_base_url = 'http://' + nlb_endpoint + ':' + nlb_opensearch_port +dashboards_base_url = 'http://' + nlb_endpoint + ':' + nlb_dashboards_port + +http_basic_auth = HTTPBasicAuth(username, password) + + +""" +MetricData=[ + { + 'MetricName': 'string', + 'Dimensions': [ + { + 'Name': 'string', + 'Value': 'string' + }, + ], + 'Timestamp': datetime(2015, 1, 1), + 'Value': 123.0, + 'StatisticValues': { + 'SampleCount': 123.0, + 'Sum': 123.0, + 'Minimum': 123.0, + 'Maximum': 123.0 + }, + 'Values': [ + 123.0, + ], + 'Counts': [ + 123.0, + ], + 'Unit': 'Seconds'|'Microseconds'|'Milliseconds'|'Bytes'|'Kilobytes'|'Megabytes'|'Gigabytes'|'Terabytes'|'Bits'|'Kilobits'|'Megabits'|'Gigabits'|'Terabits'|'Percent'|'Count'|'Bytes/Second'|'Kilobytes/Second'|'Megabytes/Second'|'Gigabytes/Second'|'Terabytes/Second'|'Bits/Second'|'Kilobits/Second'|'Megabits/Second'|'Gigabits/Second'|'Terabits/Second'|'Count/Second'|'None', + 'StorageResolution': 123 + }, +] +""" + + +class OpenSearchHealthStatus(Enum): + RED = 1 + YELLOW = 2 + GREEN = 3 + + +class ShardCountTypes(Enum): + ACTIVE_PRIMARY_SHARDS = 1 + ACTIVE_SHARDS = 2 + DELAYED_UNASSIGNED_SHARDS = 3 + INITIALIZING_SHARDS = 4 + RELOCATING_SHARDS = 5 + UNASSIGNED_SHARDS = 6 + + +def to_upper_camel_case(snake_str): + components = snake_str.split('_') + return components[0] + ''.join(x.title() for x in components[1:]) + + +class CWMetricData(): + + def __init__(self, namespace): + self.metric_data = [] + self.namespace = namespace + + def add(self, metric): + self.metric_data.append(metric) + + def get_all_metrics(self): + return self.metric_data + + def get_namespace(self): + return self.namespace + + +def check_cluster_health(metric_data): + cluster_health_url = opensearch_base_url + '/_cluster/health?pretty' + headers = {"Content-Type": "application/json"} + + STATUS_PREFIX = 'ClusterStatus.' + SHARD_PREFIX = 'Shards.' + + try: + res = requests.get(cluster_health_url, auth=http_basic_auth, headers=headers, verify=False) + health = res.json() + + # check master available metric + if "discovered_master" in health and health["discovered_master"]: + metric_data.add({ + 'MetricName': 'MasterReachableFromNLB', + 'Value': 1.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + else: + metric_data.add({ + 'MetricName': 'MasterReachableFromNLB', + 'Value': 0.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("Successfully collected master metrics") + + # cluster health status metric + if "status" in health: + for status in OpenSearchHealthStatus: + colored_status = status.name.lower() + if health['status'] == colored_status: + metric_data.add({ + 'MetricName': STATUS_PREFIX + colored_status, + 'Value': 1.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + else: + metric_data.add({ + 'MetricName': STATUS_PREFIX + colored_status, + 'Value': 0.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("Successfully collected cluster status metrics") + + # Add node count metric + if "number_of_nodes" in health: + metric_data.add({ + 'MetricName': 'Nodes', + 'Value': health["number_of_nodes"], + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("Successfully collected node count metric") + + # Add shard count metric + for shard_count_type in ShardCountTypes: + shard_type = shard_count_type.name.lower() + if shard_type in health: + metric_data.add({ + 'MetricName': SHARD_PREFIX + to_upper_camel_case(shard_type.removeprefix("_shards")), + 'Value': health[shard_type], + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("Successfully collected shard count metrics") + + # Add pending task metric + if "number_of_pending_tasks" in health: + metric_data.add({ + 'MetricName': 'PendingTasks', + 'Value': health["number_of_pending_tasks"], + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("Successfully collected pending_tasks metrics") + + metric_data.add({ + 'MetricName': 'ClusterHealth.Failed', + 'Value': 0.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + logger.info("All check_cluster_health metrics collected") + + except Exception as e: + logger.exception(e) + metric_data.add({ + 'MetricName': 'ClusterHealth.Failed', + 'Value': 1.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + + +def check_dashboards_health(metric_data): + dashboards_status_url = dashboards_base_url + '/api/status' + val = 0.0 + try: + res = requests.get(dashboards_status_url).json() + + if res['status']['overall']['state'] == 'green': + val = 1.0 + + metric_data.add({ + 'MetricName': 'OpenSearchDashboardsHealthyNodes', + 'Value': val, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + + logger.info("All check_dashboards_health metrics collected") + except Exception as e: + logger.exception(e) + metric_data.add({ + 'MetricName': 'OpenSearchDashboardsHealthyNodes', + 'Value': 0.0, + 'Unit': 'Count', + 'StorageResolution': 60 + }) + + +def handler(event, context): + cw = boto3.client("cloudwatch") + metrics = CWMetricData("opensearch-website-search") + check_cluster_health(metrics) + check_dashboards_health(metrics) + cw.put_metric_data(Namespace=metrics.get_namespace(), + MetricData=metrics.get_all_metrics()) diff --git a/cdk/opensearch-website-search/lambdas/monitoring-lambda/requirements.txt b/cdk/opensearch-website-search/lambdas/monitoring-lambda/requirements.txt new file mode 100644 index 0000000..138d722 --- /dev/null +++ b/cdk/opensearch-website-search/lambdas/monitoring-lambda/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 + diff --git a/cdk/opensearch-website-search/lambdas/search-lambda/__init__.py b/cdk/opensearch-website-search/lambdas/search-lambda/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cdk/opensearch-website-search/lambdas/search-lambda/doc-search.py b/cdk/opensearch-website-search/lambdas/search-lambda/doc-search.py new file mode 100644 index 0000000..54dda2f --- /dev/null +++ b/cdk/opensearch-website-search/lambdas/search-lambda/doc-search.py @@ -0,0 +1,173 @@ +import json +import requests +import os +import re +from requests.auth import HTTPBasicAuth + +username = os.getenv('SEARCH_USER') +password = os.getenv('SEARCH_PASS') +nlb_opensearch_port = os.getenv('NLB_OPENSEARCH_PORT', "80") +host = 'http://' + os.getenv('NLB_ENDPOINT') + ':' + nlb_opensearch_port +index = 'docs' + +http_basic_auth = HTTPBasicAuth(username, password) + + +def handler(event, context): + url_param_str = 'queryStringParameters' + query_key = 'q' + doc_version_key = 'v' + response = { + "statusCode": 400, + "headers": { + "Access-Control-Allow-Origin": '*', + "Content-Type": "application/json" + }, + "isBase64Encoded": False, + "body": "{}"} + + if ( + url_param_str not in event or + 'q' not in event[url_param_str] or + event[url_param_str][query_key].strip() == '' + ): + response["body"] = json.dumps({ + "error": { + "code": "MISSING_QUERY", + "message": "No search query provided" + } + }) + return response + + # Input sanitization, courtesy https://github.com/AMoo-Miki + # ToDo: Relax this to allow AND/OR/NOT/PRECEDENCE for `simple_query_string` + q = re.sub(r"([\\+=\-!(){}[\]^~*?:/'\"<>| ]|&{2,}|\b(AND|OR|NOT)\b)+", " ", event[url_param_str][query_key]).strip() + + doc_version = re.sub(r"[^\d.]+", "", event[url_param_str][doc_version_key]).strip() if event[url_param_str].get( + doc_version_key) else os.getenv('DOCS_LATEST') + + return doSearch(q, doc_version, response) + + +def doSearch(q, doc_version, response): + search_url = host + '/' + index + '/_search' + + # 1. TODO: if the user and password for search is not present in ENV variable invoke a CW alarm + # 2. TODO: modify this query to improve relevance and extract right fields based on + # document schema and fields required by frontend + query = { + "query": { + "bool": { + "must": [ + { + "simple_query_string": { + "query": q, + "fields": ["title", "content"], + "analyze_wildcard": True, + "default_operator": "and" + } + }, { + "bool": { + "should": [ + { + "match": { + "version": doc_version + } + }, { + "bool": { + "must_not": [ + { + "match": { + "type": "DOCS" + } + } + ] + } + } + ] + } + } + ] + } + }, + # Overriding `boundary_chars` as "\n" to get full paragraphs is not working. Will need to figure the best match ourselves. + "highlight": { + "no_match_size": 80, + "fragment_size": 250, + "pre_tags": "", + "post_tags": "", + "fields": { + "content": { + "number_of_fragments": 1 + } + } + }, + # `collection` is a string used by the website + # `ancestors` is an array used by the docsite + "_source": ["url", "version", "type", "title", "content", "ancestors", "collection"], + "size": 200 + } + + # ES > 6.x requires an explicit Content-Type header + headers = {"Content-Type": "application/json"} + r = requests.get(search_url, auth=http_basic_auth, headers=headers, data=json.dumps(query)) + + response['statusCode'] = r.status_code + result = json.loads(r.text) + + if r.status_code == 200: + output = limitResults(result['hits']['hits']) if 'hits' in result and 'hits' in result['hits'] else [] + if not output and not q.endswith('*'): + # If no results were found, make last word act as a prefix + return doSearch(q + "*", doc_version, response) + + response['body'] = json.dumps({"results": output}) + + # Look for error key when status_code != 200 + elif 'error' in result and 'caused_by' in result['error'] and result['error']['caused_by']['reason']: + response['body'] = json.dumps({ + "error": { + "code": result['error']['caused_by']['type'].upper() if result['error']['caused_by']['type'] else "ERROR", + "message": result['error']['caused_by']['reason'] + } + }) + + # If status_code != 200 and no error details found, send back unknown error + else: + response['body'] = json.dumps({"error": {"code": "UNKNOWN", "message": "Unknown error"}}) + + return response + + +def limitResults(hits): + return list(filter(bool, map(limitProperties, hits))) + + +def limitProperties(record): + if '_source' not in record: + return None + + source = record['_source'] + result = { + "url": source['url'], + "type": source['type'], + "version": source['version'], + "title": source['title'] + } + + ancestors = [] + if 'ancestors' in source and source['ancestors']: + ancestors.extend(source['ancestors']) + elif 'collection' in source and source['collection']: + ancestors.append(source['collection']) + + result['ancestors'] = ancestors + + if ( + 'highlight' in record and + 'content' in record['highlight'] and + record['highlight']['content'] + ): + result["content"] = record['highlight']['content'][0] + + return result \ No newline at end of file diff --git a/cdk/opensearch-website-search/lambdas/search-lambda/requirements.txt b/cdk/opensearch-website-search/lambdas/search-lambda/requirements.txt new file mode 100644 index 0000000..a8ed785 --- /dev/null +++ b/cdk/opensearch-website-search/lambdas/search-lambda/requirements.txt @@ -0,0 +1 @@ +requests==2.26.0 diff --git a/cdk/opensearch-website-search/requirements.txt b/cdk/opensearch-website-search/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/cdk/opensearch-website-search/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/cdk/opensearch-website-search/setup.py b/cdk/opensearch-website-search/setup.py new file mode 100644 index 0000000..98eb9f1 --- /dev/null +++ b/cdk/opensearch-website-search/setup.py @@ -0,0 +1,57 @@ +import setuptools + + +with open("README.md") as fp: + long_description = fp.read() + + +setuptools.setup( + name="opensearch_org_search_cdk", + version="1.0.0", + + description="Infra set up for OpenSearch cluster that powers search functionality on opensearch.org using CDK", + long_description=long_description, + long_description_content_type="text/markdown", + + author="Abbas Hussain abbashus@github", + + package_dir={"": "website_search_cdk"}, + packages=setuptools.find_packages(where="website_search_cdk"), + + install_requires=[ + "aws-cdk.core", + "aws-cdk.aws-s3", + "aws-cdk.aws-ec2", + "aws-cdk.aws-cloudwatch", + "aws-cdk.aws_dynamodb", + "aws-cdk.aws-autoscaling", + "aws-cdk.aws_elasticloadbalancingv2", + "aws-cdk.aws_iam", + "aws-cdk.aws_logs", + "aws-cdk.aws_lambda", + "aws-cdk.aws_apigateway", + "aws-cdk.aws_events_targets", + "aws-cdk.aws_logs" + ], + + python_requires=">=3.6", + + classifiers=[ + "Development Status :: 4 - Beta", + + "Intended Audience :: Developers", + + "License :: OSI Approved :: Apache Software License", + + "Programming Language :: JavaScript", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + + "Topic :: Software Development :: Code Generators", + "Topic :: Utilities", + + "Typing :: Typed", + ], +) diff --git a/cdk/opensearch-website-search/source.bat b/cdk/opensearch-website-search/source.bat new file mode 100644 index 0000000..9e1a834 --- /dev/null +++ b/cdk/opensearch-website-search/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/cdk/opensearch-website-search/userdata/tar/dashboards.sh b/cdk/opensearch-website-search/userdata/tar/dashboards.sh new file mode 100644 index 0000000..5225b43 --- /dev/null +++ b/cdk/opensearch-website-search/userdata/tar/dashboards.sh @@ -0,0 +1,11 @@ +#Installing Opensearch dashboards +dashboards_url="${__DASHBOARDS_URL__}" +cd / +curl "${!dashboards_url}" -o opensearch-dashboards +tar zxf opensearch-dashboards +chown -R ec2-user:ec2-user opensearch-dashboards-* +cd opensearch-dashboards-* || exit +sudo sed -i /opensearch.hosts/s/https/http/1 config/opensearch_dashboards.yml +echo "server.host: 0.0.0.0" >> config/opensearch_dashboards.yml +sudo -u ec2-user touch dashboard_install.log +sudo -u ec2-user nohup ./bin/opensearch-dashboards > dashboard_install.log 2>&1 & \ No newline at end of file diff --git a/cdk/opensearch-website-search/userdata/tar/main.sh b/cdk/opensearch-website-search/userdata/tar/main.sh new file mode 100644 index 0000000..b41efe7 --- /dev/null +++ b/cdk/opensearch-website-search/userdata/tar/main.sh @@ -0,0 +1,63 @@ +#!/bin/bash +downloadUrl="${__URL__}" +stackName="${__STACK_NAME__}" +nodeName="${__NODE_NAME__}" +master="${__MASTER__}" +data="${__DATA__}" +ingest="${__INGEST__}" +logGroup="${__LG__}" + + +sudo sysctl -w vm.max_map_count=262144 +sudo yum update -y +sudo yum install -y libnss3.so xorg-x11-fonts-100dpi xorg-x11-fonts-75dpi xorg-x11-utils xorg-x11-fonts-cyrillic xorg-x11-fonts-Type1 xorg-x11-fonts-misc fontconfig freetype +curl "${!downloadUrl}" -o opensearch +tar zxf opensearch +chown -R ec2-user:ec2-user opensearch* +cd opensearch-* +# TODO: Should discovery.ec2.tag.role be same as cluster.name, used for master discovery or all nodes in a cluster discovery? +{ +echo " +cluster.name: ${!stackName} +cluster.initial_master_nodes: [\"seed\"] +discovery.seed_providers: ec2 +discovery.ec2.tag.role: master +network.host: 0.0.0.0 +node.name: ${!nodeName} +node.master: ${!master} +node.data: ${!data} +node.ingest: ${!ingest} +" +} >> config/opensearch.yml +uuid=$(uuidgen | cut -d - -f 1) +sudo sed -i /^node.name/s/node/"$uuid"/2 config/opensearch.yml +# Disabling HTTPS since all calls within VPC +sudo sed -i /plugins.security.ssl.http.enabled/s/true/false/1 plugins/opensearch-security/tools/install_demo_configuration.sh +sudo -u ec2-user bin/opensearch-plugin install discovery-ec2 --batch +sudo -u ec2-user bin/opensearch-plugin install repository-s3 --batch +sudo -u ec2-user touch install.log +sudo -u ec2-user nohup ./opensearch-tar-install.sh > install.log 2>&1 & +logfile=$(pwd)/logs/${!stackName}.log + +# Creating cloudwatch logging +sudo yum install amazon-cloudwatch-agent -y +cat <<- EOF > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json +{ + "logs": { + "logs_collected": { + "files": { + "collect_list": [ + { + "file_path": "$logfile", + "log_group_name": "${!logGroup}", + "log_stream_name": "${!nodeName}", + "timezone": "UTC" + } + ] + } + } + }, + "log_stream_name": "others" +} +EOF +sudo systemctl start amazon-cloudwatch-agent \ No newline at end of file diff --git a/cdk/opensearch-website-search/website_search_cdk/__init__.py b/cdk/opensearch-website-search/website_search_cdk/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cdk/opensearch-website-search/website_search_cdk/alarms.py b/cdk/opensearch-website-search/website_search_cdk/alarms.py new file mode 100644 index 0000000..d7c2d2f --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/alarms.py @@ -0,0 +1,76 @@ +from aws_cdk import (core as cdk, aws_cloudwatch as cloudwatch) + + +class AlarmsStack(cdk.Stack): + + def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + alarm_name_template = f'[{self.region}][website-search]{{}}' + ONE_MINUTE_DURATION = cdk.Duration.minutes(1) + NAMESPACE = 'opensearch-website-search' + + alarms = [ + { + "alarm_name": alarm_name_template.format("master-not-reachable"), + "alarm_description": "Master is not reachable for opensearch.org website search OpenSearch cluster.", + "metric_namespace": NAMESPACE, + "metric_name": "MasterReachableFromNLB", + "period": ONE_MINUTE_DURATION, + "threshold": 1, + "comparison_operator": cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD, + "evaluation_periods": 3, + "datapoints_to_alarm": 3, + "treat_missing_data": cloudwatch.TreatMissingData.BREACHING + }, + { + "alarm_name": alarm_name_template.format("red-cluster"), + "alarm_description": "One or more indices are missing primary shard.", + "metric_namespace": NAMESPACE, + "metric_name": "ClusterStatus.red", + "period": ONE_MINUTE_DURATION, + "threshold": 0, + "comparison_operator": cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + "evaluation_periods": 3, + "datapoints_to_alarm": 3, + "treat_missing_data": cloudwatch.TreatMissingData.BREACHING + }, + { + "alarm_name": alarm_name_template.format("unassigned-shards"), + "alarm_description": "Some shards are unassigned", + "metric_namespace": NAMESPACE, + "metric_name": "Shards.unassignedShards", + "period": ONE_MINUTE_DURATION, + "threshold": 0, + "comparison_operator": cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + "evaluation_periods": 3, + "datapoints_to_alarm": 3, + "treat_missing_data": cloudwatch.TreatMissingData.BREACHING + }, + { + "alarm_name": alarm_name_template.format("monitoring-failed"), + "alarm_description": "The monitoring Lambda function is not working as expected.", + "metric_namespace": NAMESPACE, + "metric_name": "ClusterHealth.Failed", + "period": ONE_MINUTE_DURATION, + "threshold": 0, + "comparison_operator": cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + "evaluation_periods": 3, + "datapoints_to_alarm": 3, + "treat_missing_data": cloudwatch.TreatMissingData.BREACHING + } + ] + + for alarm in alarms: + cs_alarm = cloudwatch.Alarm(self, alarm.get('alarm_name') + '-alarm', + alarm_name=alarm.get('alarm_name'), + alarm_description=alarm.get('alarm_description'), + metric=cloudwatch.Metric(namespace=alarm.get('metric_namespace'), + metric_name=alarm.get('metric_name')), + threshold=alarm.get('threshold'), + comparison_operator=alarm.get('comparison_operator'), + period=alarm.get('period', ONE_MINUTE_DURATION), + evaluation_periods=alarm.get('evaluation_periods', 3), + datapoints_to_alarm=alarm.get('datapoints_to_alarm', 3), + treat_missing_data=alarm.get('treat_missing_data', cloudwatch.TreatMissingData.BREACHING) + ) diff --git a/cdk/opensearch-website-search/website_search_cdk/api_lambda.py b/cdk/opensearch-website-search/website_search_cdk/api_lambda.py new file mode 100644 index 0000000..482a3e3 --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/api_lambda.py @@ -0,0 +1,133 @@ +from aws_cdk import (aws_apigateway as gateway, aws_ec2 as ec2, aws_lambda, aws_logs, core as cdk) +import os + + +class ApiLambdaStack(cdk.Stack): + + def __init__(self, scope: cdk.Construct, construct_id: str, vpc, nlb, opensearch_listener, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + stack_prefix = self.node.try_get_context("stack_prefix") + allowed_origins = self.node.try_get_context("allowed_origins") + if allowed_origins is None or not isinstance(allowed_origins, list) or allowed_origins == '': + raise ValueError("Please provide a list of allowed origins for CORS support") + + nlb_opensearch_port = self.node.try_get_context("nlb_opensearch_port") or "80" + nlb_dashboards_port = self.node.try_get_context("nlb_dashboards_port") or "5601" + + search_user = self.node.try_get_context("search_user") + search_pass = self.node.try_get_context("search_pass") + + # TODO: Configure Lambda provisioned concurrency + search_lambda = aws_lambda.Function(self, 'search-lambda', + handler='doc-search.handler', + runtime=aws_lambda.Runtime.PYTHON_3_9, + code=aws_lambda.Code.asset('lambdas/search-lambda'), + vpc=vpc, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + environment={ + 'SEARCH_USER': os.getenv("SEARCH_USER", search_user), + 'SEARCH_PASS': os.getenv("SEARCH_PASS", search_pass), + 'NLB_ENDPOINT': nlb.load_balancer_dns_name, + 'NLB_OPENSEARCH_PORT': self.node.try_get_context("nlb_opensearch_port") or 80, + }) + + """ + https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-logging.html + + default access log format used + { "requestId":"$context.requestId", "ip": "$context.identity.sourceIp", + "caller":"$context.identity.caller", "user":"$context.identity.user", + "requestTime":"$context.requestTime", "httpMethod":"$context.httpMethod", + "resourcePath":"$context.resourcePath", "status":"$context.status", + "protocol":"$context.protocol", "responseLength":"$context.responseLength" } + """ + gateway_access_log_group = aws_logs.LogGroup(self, "LogGroup", + log_group_name=f'{stack_prefix}api-gateway/access-logs', + retention=aws_logs.RetentionDays.THREE_MONTHS) + + access_log_destination = gateway.LogGroupLogDestination(gateway_access_log_group) + + stage_options = gateway.StageOptions( + logging_level=gateway.MethodLoggingLevel.INFO, + metrics_enabled=True, + throttling_burst_limit=5000, # Default value is 5000 + throttling_rate_limit=10000, # Default value is 10000 + stage_name='prod', # Default is prod, need a better name, this becomes part of URI + access_log_destination=access_log_destination + ) + + api = gateway.RestApi(self, stack_prefix + 'opensearch-api-gateway', + rest_api_name=stack_prefix + 'opensearch-api-gateway', + description="APIs for search and managing OpenSearch cluster", + deploy_options=stage_options + ) + + search_entity = api.root.add_resource( + 'search', + default_cors_preflight_options=gateway.CorsOptions( + allow_methods=['GET', 'OPTIONS'], + allow_origins=allowed_origins + ) + ) + + search_entity_lambda_integration = gateway.LambdaIntegration(search_lambda, proxy=True) + search_entity.add_method('GET', search_entity_lambda_integration) + + # create a VPC link + vpc_link = gateway.VpcLink(self, stack_prefix + 'opensearch-vpcLink', targets=[nlb], + vpc_link_name='OpenSearchVpcLink') + + # create HTTP Proxy integration to access opensearch + opensearch_http_proxy_integration = gateway.Integration( + type=gateway.IntegrationType.HTTP_PROXY, + integration_http_method='ANY', + options=gateway.IntegrationOptions( + vpc_link=vpc_link, + cache_key_parameters=["method.request.path.proxy"], + request_parameters={ + "integration.request.path.proxy": "method.request.path.proxy" + }, + integration_responses=[gateway.IntegrationResponse( + status_code="200" + )] + ), + uri=f'http://{nlb.load_balancer_dns_name}:{nlb_opensearch_port}/' + '{proxy}' + ) + + # create HTTP Proxy integration to access opensearch dashboards + opensearch_dashboard_http_proxy_integration = gateway.Integration( + type=gateway.IntegrationType.HTTP_PROXY, + integration_http_method='ANY', + options=gateway.IntegrationOptions( + vpc_link=vpc_link, + cache_key_parameters=["method.request.path.proxy"], + request_parameters={ + "integration.request.path.proxy": "method.request.path.proxy" + }, + integration_responses=[gateway.IntegrationResponse( + status_code="200" + )] + ), + uri=f'http://{nlb.load_balancer_dns_name}:{nlb_dashboards_port}/' + '{proxy}' + ) + + opensearch_access_entity = api.root.add_resource('opensearch').add_resource('{proxy+}') + opensearch_access_entity.add_method( + http_method='ANY', + integration=opensearch_http_proxy_integration, + operation_name='ReadWriteAccessOpenSearch', + request_parameters={ + "method.request.path.proxy": True + } + ) + + opensearch_dashboard_access_entity = api.root.add_resource('opensearch-dashboards').add_resource('{proxy+}') + opensearch_dashboard_access_entity.add_method( + http_method='ANY', + integration=opensearch_dashboard_http_proxy_integration, + operation_name='AccessOpenSearchDashboards', + request_parameters={ + "method.request.path.proxy": True + } + ) diff --git a/cdk/opensearch-website-search/website_search_cdk/bastions.py b/cdk/opensearch-website-search/website_search_cdk/bastions.py new file mode 100644 index 0000000..1fe3e23 --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/bastions.py @@ -0,0 +1,71 @@ +import os + +from aws_cdk import (aws_autoscaling as asg, aws_ec2 as ec2, core as cdk) +from aws_cdk.core import Tags + +# These are corp only access prefixes per region (you can access resources only within amazon corp network) +# More info: https://apll.corp.amazon.com/ +REGION_PREFIX_MAP = { + "ap-northeast-1": "pl-bea742d7", + "ap-northeast-2": "pl-8fa742e6", + "ap-northeast-3": "pl-42a6432b", + "ap-south-1": "pl-f0a04599", + "ap-southeast-1": "pl-60a74209", + "ap-southeast-2": "pl-04a7426d", + "ca-central-1": "pl-85a742ec", + "eu-central-1": "pl-19a74270", + "eu-north-1": "pl-c2aa4fab", + "eu-west-1": "pl-01a74268", + "eu-west-2": "pl-fca24795", + "eu-west-3": "pl-7dac4914", + "sa-east-1": "pl-a6a742cf", + "us-east-1": "pl-60b85b09", + "us-east-2": "pl-3ea44157", + "us-west-1": "pl-a4a742cd", + "us-west-2": "pl-f8a64391" +} +region = os.environ.get("CDK_DEPLOY_REGION", os.environ["CDK_DEFAULT_REGION"]) +prefixList = ec2.Peer.prefix_list(REGION_PREFIX_MAP[region]) + + +class Bastions(cdk.Stack): + + def __init__(self, scope: cdk.Construct, construct_id: str, vpc, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + stack_prefix = self.node.try_get_context("stack_prefix") + keypair = self.node.try_get_context("keypair") + capacity = 3 # One in each AZ + + if keypair is None or keypair == '': + raise ValueError("Please provide the EC2 keypair") + + # t3.micro CPU:2 Mem:2GiB $0.0104 + instance_type = ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.MICRO) + ami_id = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + cpu_type=ec2.AmazonLinuxCpuType.X86_64) + + bastion_security_group = ec2.SecurityGroup(self, stack_prefix + "bastion-security-group", + vpc=vpc, + description="Security group for bastion hosts restricted to corp CIDR", + security_group_name=stack_prefix + "bastion-security-group", + allow_all_outbound=True, + ) + + bastion_security_group.add_ingress_rule(prefixList, ec2.Port.tcp(22), description="SSH access from restricted CIDR") + + # TODO: What does below all traffic ingress mean and is it required? Can this be more restricted? + bastion_security_group.add_ingress_rule(bastion_security_group, ec2.Port.all_traffic()) + + bastion_nodes = asg.AutoScalingGroup(self, "BastionASG", + instance_type=instance_type, + machine_image=ami_id, + vpc=vpc, security_group=bastion_security_group, + desired_capacity=capacity, + max_capacity=capacity, + min_capacity=capacity, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), + key_name=keypair + ) + + Tags.of(bastion_nodes).add("role", region + "-" + "opensearch-bastion-hosts") diff --git a/cdk/opensearch-website-search/website_search_cdk/infra.py b/cdk/opensearch-website-search/website_search_cdk/infra.py new file mode 100644 index 0000000..f035280 --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/infra.py @@ -0,0 +1,253 @@ +import datetime +from enum import Enum + +from aws_cdk import (aws_autoscaling as asg, aws_ec2 as ec2, aws_elasticloadbalancingv2 as elb, aws_iam as iam, + aws_logs as logs, core as cdk) +from aws_cdk.core import Tags + + +# For consistency with other languages, `cdk` is the preferred import name for +# the CDK's core module. The following line also imports it as `core` for use +# with examples from the CDK Developer's Guide, which are in the process of +# being updated to use `cdk`. You may delete this import if you don't need it. + +# Refer: https://github.com/aws/aws-cdk/blob/master/packages/%40aws-cdk/aws-ec2/lib/instance-types.ts +class InstanceClass(Enum): + m3 = ec2.InstanceClass.STANDARD3 + m4 = ec2.InstanceClass.STANDARD4 + m5 = ec2.InstanceClass.STANDARD5 + m6g = ec2.InstanceClass.STANDARD6_GRAVITON + + +class Architecture(Enum): + X64 = "x64" + ARM64 = "arm64" + + @classmethod + def has_value(cls, value): + return value in cls._value2member_map_ + + +class Security(Enum): + ENABLE = "enable" + DISABLE = "disable" + + @classmethod + def has_security_value(cls, value): + return value in cls._value2member_map_ + + +class ClusterStack(cdk.Stack): + def __init__(self, scope: cdk.Construct, construct_id: str, vpc, sg, architecture, security, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Context variables can be passed from command line using -c/--context flag. The values are stored in + # cdk.context.json file. If you do not pass any command line context key values, + # the defaults will be picked up from cdk.context.json file + distribution = self.node.try_get_context("distribution") + url = self.node.try_get_context("url") + dashboards_url = self.node.try_get_context("dashboards_url") + + # value checking for stack_prefix already done in app.py, omitting here + stack_prefix = self.node.try_get_context("stack_prefix") + + # TODO: add value checks for master_node_count, data_node_count and data_node_count + master_node_count = int(self.node.try_get_context("master_node_count")) + data_node_count = int(self.node.try_get_context("data_node_count")) + + keypair = self.node.try_get_context("keypair") + if keypair is None or keypair == '': + raise ValueError("Please provide the EC2 keypair") + if url is None or url == '': + raise ValueError("url cannot be null or empty") + if dashboards_url is None or dashboards_url == '': + raise ValueError(" dashboard_url cannot be null or empty") + if distribution is None or distribution == '' or distribution != "tar": + raise ValueError("Distribution cannot be null or empty. Please use tar ") + + nlb_opensearch_port = int(self.node.try_get_context("nlb_opensearch_port")) or 80 + nlb_dashboards_port = int(self.node.try_get_context("nlb_dashboards_port")) or 5601 + + # ami_id = self.node.try_get_context("ami_id") + # if ami_id is None or ami_id == '': + # raise ValueError("Please provide a valid ami-id. This should be a Amazon Linux 2 based AMI") + + ami_id = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + cpu_type=ec2.AmazonLinuxCpuType.X86_64) + + # Creating IAM role for read only access + read_secrets_policy = iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=["secretsmanager:GetResourcePolicy", + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret", + "secretsmanager:ListSecretVersionIds", + "secretsmanager:GetRandomPassword", + "secretsmanager:ListSecrets" + ], + resources=["arn:aws:secretsmanager:*:*:secret:*"], + ) + read_secrets_document = iam.PolicyDocument() + read_secrets_document.add_statements(read_secrets_policy) + + ec2_iam_role = iam.Role(self, "ec2_iam_role", + assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"), + managed_policies=[ + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonEC2ReadOnlyAccess"), + iam.ManagedPolicy.from_aws_managed_policy_name("CloudWatchAgentServerPolicy") + ], + inline_policies={'ReadSecrets': read_secrets_document} + ) + + # def get_ec2_settings(arch): + # if arch == Architecture.X64.value: + # instance_type = ec2.InstanceType.of(InstanceClass.m5.value, ec2.InstanceSize.XLARGE) + # ami_id = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + # cpu_type=ec2.AmazonLinuxCpuType.X86_64) + # return instance_type, ami_id + # elif arch == Architecture.ARM64.value: + # instance_type = ec2.InstanceType.of(InstanceClass.m6g.value, ec2.InstanceSize.XLARGE) + # ami_id = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + # cpu_type=ec2.AmazonLinuxCpuType.ARM_64) + # return instance_type, ami_id + # else: + # raise ValueError("Unrecognised architecture") + + stack_name = cdk.Stack.of(self).stack_name + + # Logging + dt = datetime.datetime.utcnow() + dtformat = dt.strftime("%m-%d-%y-t%H-%M") + lg = logs.LogGroup(self, "LogGroup", + log_group_name=stack_name + '-' + dtformat, + retention=logs.RetentionDays.THREE_MONTHS) + + # Creating userdata for installation process + log_group_name = lg.log_group_name + userdata_map = { + "common": { + "__URL__": url, + "__STACK_NAME__": stack_name, + "__LG__": log_group_name, + "__SECURITY_PARAM__": security + }, + "master": { + "__NODE_NAME__": "master-node", + "__MASTER__": "true", + "__DATA__": "false", + "__INGEST__": "false" + }, + "seed": { + "__NODE_NAME__": "seed", + "__MASTER__": "true", + "__DATA__": "true", + "__INGEST__": "false" + }, + "data": { + "__NODE_NAME__": "data-node", + "__MASTER__": "false", + "__DATA__": "true", + "__INGEST__": "true" + }, + "client": { + "__NODE_NAME__": "client-node", + "__MASTER__": "false", + "__DATA__": "false", + "__INGEST__": "false" + }, + "dashboards": { + "__DASHBOARDS_URL__": dashboards_url, + "__SECURITY_PARAM__": security + } + } + userdata_map["master"].update(userdata_map["common"]) + userdata_map["client"].update(userdata_map["common"]) + userdata_map["seed"].update(userdata_map["common"]) + userdata_map["data"].update(userdata_map["common"]) + + with open(f"./userdata/{distribution}/main.sh") as f: + master_userdata = cdk.Fn.sub(f.read(), userdata_map["master"]) + with open(f"./userdata/{distribution}/main.sh") as f: + seed_userdata = cdk.Fn.sub(f.read(), userdata_map["seed"]) + with open(f"./userdata/{distribution}/main.sh") as f: + data_userdata = cdk.Fn.sub(f.read(), userdata_map["data"]) + with open(f"./userdata/{distribution}/main.sh") as f: + client_userdata = cdk.Fn.sub(f.read(), userdata_map["client"]) + with open(f"./userdata/{distribution}/dashboards.sh") as f: + dashboards_userdata = cdk.Fn.sub(f.read(), userdata_map["dashboards"]) + + # # ec2_instance_type, ami_id = get_ec2_settings(architecture) + # ami_id = ec2.MachineImage.latest_amazon_linux(generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + # cpu_type=ec2.AmazonLinuxCpuType.X86_64) + + # Launching autoscaling groups that will configure all nodes + master_nodes = asg.AutoScalingGroup(self, "MasterASG", + instance_type=ec2.InstanceType.of(InstanceClass.m5.value, + ec2.InstanceSize.XLARGE), + machine_image=ami_id, + vpc=vpc, security_group=sg, + desired_capacity=master_node_count, + max_capacity=master_node_count, + min_capacity=master_node_count, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + key_name=keypair, + # TODO: do we still need to have keypair since this will be in private subnet + role=ec2_iam_role, + user_data=ec2.UserData.custom(master_userdata)) + Tags.of(master_nodes).add("role", "master") + + # TODO: Can the seed ASG be eliminated? + seed_node = asg.AutoScalingGroup(self, "SeedASG", + instance_type=ec2.InstanceType.of(InstanceClass.m5.value, ec2.InstanceSize.XLARGE), + machine_image=ami_id, + vpc=vpc, security_group=sg, + desired_capacity=1, + max_capacity=1, + min_capacity=1, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + key_name=keypair, + # TODO: do we still need to have keypair since this will be in private subnet + role=ec2_iam_role, + user_data=ec2.UserData.custom(seed_userdata)) + Tags.of(seed_node).add("role", "master") + + # Data nodes should be equally spread across 3 AZ's to resist AZ outages + data_nodes = asg.AutoScalingGroup(self, "DataASG", + instance_type=ec2.InstanceType.of(InstanceClass.m5.value, + ec2.InstanceSize.XLARGE), + machine_image=ami_id, + vpc=vpc, security_group=sg, + desired_capacity=data_node_count, + max_capacity=data_node_count, + min_capacity=data_node_count, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + key_name=keypair, + # TODO: do we still need to have keypair since this will be in private subnet + role=ec2_iam_role, + user_data=ec2.UserData.custom(data_userdata + dashboards_userdata)) + Tags.of(data_nodes).add("role", "data") + + # creating an private network load balancer to have a single endpoint + # TODO: enable logging, requires configuring a S3 bucket + self.nlb = elb.NetworkLoadBalancer(self, stack_prefix + "NetworkLoadBalancer", + vpc=vpc, + internet_facing=False, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + deletion_protection=True) + + self.opensearch_listener = self.nlb.add_listener("opensearch", port=nlb_opensearch_port, + protocol=elb.Protocol.TCP) + self.dashboards_listener = self.nlb.add_listener("dashboards", port=nlb_dashboards_port, + protocol=elb.Protocol.TCP) + + # Default Port mapping + # 80 : 9200 OpenSearch + # 5601 : 5601 OpenSearch-Dashboards + self.opensearch_listener.add_targets("OpenSearchTarget", + port=9200, + targets=[data_nodes]) + self.dashboards_listener.add_targets("DashboardsTarget", + port=5601, + targets=[data_nodes]) + cdk.CfnOutput(self, "Load Balancer Endpoint", + value=self.nlb.load_balancer_dns_name) diff --git a/cdk/opensearch-website-search/website_search_cdk/monitoring.py b/cdk/opensearch-website-search/website_search_cdk/monitoring.py new file mode 100644 index 0000000..7359632 --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/monitoring.py @@ -0,0 +1,60 @@ +from aws_cdk import (aws_ec2 as ec2, aws_events as events, aws_events_targets as targets, aws_iam as iam, + aws_lambda as aws_lambda, core as cdk) +import os + +class MonitoringStack(cdk.Stack): + + def __init__(self, scope: cdk.Construct, construct_id: str, vpc, nlb, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + monitoring_user = self.node.try_get_context("monitoring_user") + monitoring_pass = self.node.try_get_context("monitoring_pass") + + custom_cw_metrics_policy = iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=["cloudwatch:PutMetricData"], + resources=["*"], + ) + custom_cw_metrics_document = iam.PolicyDocument() + custom_cw_metrics_document.add_statements(custom_cw_metrics_policy) + + monitoring_role = iam.Role(self, "monitoring_role", + assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), + managed_policies=[ + iam.ManagedPolicy.from_aws_managed_policy_name("service-role" + "/AWSLambdaBasicExecutionRole"), + iam.ManagedPolicy.from_aws_managed_policy_name("service-role" + "/AWSLambdaVPCAccessExecutionRole"), + ], + inline_policies={'PutCustomCWMetrics': custom_cw_metrics_document} + ) + + # TODO: Configure Lambda provisioned concurrency, requires setting alias, version etc + monitoring_lambda = aws_lambda.Function(self, 'MonitoringLambda', + handler='monitor.handler', + runtime=aws_lambda.Runtime.PYTHON_3_9, + code=aws_lambda.Code.asset('lambdas/monitoring-lambda'), + vpc=vpc, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + role=monitoring_role, + environment={ + 'MONITORING_USER': os.getenv("MONITORING_USER", monitoring_user), + 'MONITORING_PASS': os.getenv("MONITORING_PASS", monitoring_pass), + 'NLB_ENDPOINT': nlb.load_balancer_dns_name, + 'NLB_OPENSEARCH_PORT': self.node.try_get_context( + "nlb_opensearch_port") or 80, + 'NLB_DASHBOARD_PORT': self.node.try_get_context( + "nlb_dashboards_port") or 5601, + }) + # Run every minute + # See https://docs.aws.amazon.com/lambda/latest/dg/tutorial-scheduled-events-schedule-expressions.html + rule = events.Rule( + self, "OpenSearchMonitoringRule", + schedule=events.Schedule.cron( + minute='*', + hour='*', + month='*', + week_day='*', + year='*'), + ) + rule.add_target(targets.LambdaFunction(monitoring_lambda)) diff --git a/cdk/opensearch-website-search/website_search_cdk/network.py b/cdk/opensearch-website-search/website_search_cdk/network.py new file mode 100644 index 0000000..697e168 --- /dev/null +++ b/cdk/opensearch-website-search/website_search_cdk/network.py @@ -0,0 +1,79 @@ +import os + +from aws_cdk import (aws_ec2 as ec2, core as cdk) + +# For consistency with other languages, `cdk` is the preferred import name for +# the CDK's core module. The following line also imports it as `core` for use +# with examples from the CDK Developer's Guide, which are in the process of +# being updated to use `cdk`. You may delete this import if you don't need it. + +# These are corp only access prefixes per region (you can access resources only within amazon corp network) +# More info: https://apll.corp.amazon.com/ +REGION_PREFIX_MAP = { + "ap-northeast-1": "pl-bea742d7", + "ap-northeast-2": "pl-8fa742e6", + "ap-northeast-3": "pl-42a6432b", + "ap-south-1": "pl-f0a04599", + "ap-southeast-1": "pl-60a74209", + "ap-southeast-2": "pl-04a7426d", + "ca-central-1": "pl-85a742ec", + "eu-central-1": "pl-19a74270", + "eu-north-1": "pl-c2aa4fab", + "eu-west-1": "pl-01a74268", + "eu-west-2": "pl-fca24795", + "eu-west-3": "pl-7dac4914", + "sa-east-1": "pl-a6a742cf", + "us-east-1": "pl-60b85b09", + "us-east-2": "pl-3ea44157", + "us-west-1": "pl-a4a742cd", + "us-west-2": "pl-f8a64391" +} +region = os.environ.get("CDK_DEPLOY_REGION", os.environ["CDK_DEFAULT_REGION"]) +prefixList = ec2.Peer.prefix_list(REGION_PREFIX_MAP[region]) + + +class Network(cdk.Stack): + + def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + cidr = self.node.try_get_context("cidr") + if cidr is None or cidr == '': + raise ValueError("CIDR cannot be null or empty") + + # value checking for stack_prefix already done in app.py, omitting here + stack_prefix = self.node.try_get_context("stack_prefix") + + public_subnet = ec2.SubnetConfiguration(name="public", subnet_type=ec2.SubnetType.PUBLIC, cidr_mask=24) + private_subnet = ec2.SubnetConfiguration(name="private", subnet_type=ec2.SubnetType.PRIVATE, cidr_mask=24) + + self.vpc = ec2.Vpc(self, stack_prefix + "cdk-vpc", + cidr=cidr, + max_azs=3, + nat_gateways=1, + subnet_configuration=[private_subnet, public_subnet] + ) + + vpc_flow_logs = ec2.FlowLog(self, stack_prefix + "vpc-flow-logs", + resource_type=ec2.FlowLogResourceType.from_vpc(self.vpc)) + + self.security_group = ec2.SecurityGroup(self, stack_prefix + "cdk-security-group", + vpc=self.vpc, + description="Opensearch website search domain security group", + security_group_name=stack_prefix + "SecurityGroup", + allow_all_outbound=True, + ) + + self.security_group.add_ingress_rule(prefixList, ec2.Port.tcp(22), description="SSH access to the nodes") + self.security_group.add_ingress_rule(ec2.Peer.ipv4(self.vpc.vpc_cidr_block), ec2.Port.tcp(9300), + description="Transport port for cluster formation") + self.security_group.add_ingress_rule(ec2.Peer.ipv4(self.vpc.vpc_cidr_block), ec2.Port.tcp(9200), + description="OpenSearch runs on port 9200 from VPC") + self.security_group.add_ingress_rule(ec2.Peer.ipv4(self.vpc.vpc_cidr_block), ec2.Port.tcp(9600), + description="Performance Analyzer plugin port from VPC") + self.security_group.add_ingress_rule(ec2.Peer.ipv4(self.vpc.vpc_cidr_block), ec2.Port.tcp(5601), + description="Used for accessing OpenSearch Dashboards from VPC") + self.security_group.add_ingress_rule(ec2.Peer.ipv4(self.vpc.vpc_cidr_block), ec2.Port.tcp(22), + description="SSH into private subnet using public subnet nodes") + # TODO: What does below all traffic ingress mean and is it required? + self.security_group.add_ingress_rule(self.security_group, ec2.Port.all_traffic())