From d04441189a091022d9cc0bd62fdbbabbd82d513a Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:10:50 +0000 Subject: [PATCH] Modify lambda processor and sink documentation (#8697) (#8771) --- .../configuration/processors/aws-lambda.md | 78 ++----------------- .../configuration/sinks/aws-lambda.md | 73 +++++++++++++++++ 2 files changed, 79 insertions(+), 72 deletions(-) create mode 100644 _data-prepper/pipelines/configuration/sinks/aws-lambda.md diff --git a/_data-prepper/pipelines/configuration/processors/aws-lambda.md b/_data-prepper/pipelines/configuration/processors/aws-lambda.md index 832f1c1a27..bd167996a1 100644 --- a/_data-prepper/pipelines/configuration/processors/aws-lambda.md +++ b/_data-prepper/pipelines/configuration/processors/aws-lambda.md @@ -1,18 +1,18 @@ --- layout: default -title: AWS Lambda integration for Data Prepper +title: aws_lambda parent: Processors grand_parent: Pipelines nav_order: 10 --- -# AWS Lambda integration for Data Prepper +# aws_lambda integration for Data Prepper -The AWS Lambda integration allows developers to use serverless computing capabilities within their Data Prepper pipelines for flexible event processing and data routing. +The [AWS Lambda](https://aws.amazon.com/lambda/) integration allows developers to use serverless computing capabilities within their Data Prepper pipelines for flexible event processing and data routing. ## AWS Lambda processor configuration -The `aws_lambda processor` enables invocation of an AWS Lambda function within your Data Prepper pipeline to process events. It supports both synchronous and asynchronous invocations based on your use case. +The `aws_lambda` processor enables invocation of an AWS Lambda function within your Data Prepper pipeline in order to process events. It supports both synchronous and asynchronous invocations based on your use case. ## Configuration fields @@ -61,8 +61,8 @@ The processor supports the following invocation types: - `request-response`: The processor waits for Lambda function completion before proceeding. - `event`: The function is triggered asynchronously without waiting for a response. -- `Batching`: When enabled, events are aggregated and sent in bulk to optimize Lambda invocations. Batch thresholds control the event count, size limit, and timeout. -- `Codec`: JSON is used for both request and response codecs. Lambda must return JSON array outputs. +- `batch`: When enabled, events are aggregated and sent in bulk to optimize Lambda invocations. Batch thresholds control the event count, size limit, and timeout. +- `codec`: JSON is used for both request and response codecs. Lambda must return JSON array outputs. - `tags_on_match_failure`: Custom tags can be applied to events when Lambda processing fails or encounters unexpected issues. ## Behavior @@ -90,71 +90,5 @@ Integration tests for this plugin are executed separately from the main Data Pre ``` ./gradlew :data-prepper-plugins:aws-lambda:integrationTest -Dtests.processor.lambda.region="us-east-1" -Dtests.processor.lambda.functionName="lambda_test_function" -Dtests.processor.lambda.sts_role_arn="arn:aws:iam::123456789012:role/dataprepper-role ``` -{% include copy-curl.html %} - -## AWS Lambda sink - -You can configure the sink using the following configuration options. - -Field | Type | Required | Description ------------------ | ------- | -------- | ---------------------------------------------------------------------------- -`function_name` | String | Required | The name of the AWS Lambda function to invoke. -`invocation_type` | String | Optional | Specifies the invocation type. Default is `event`. -`aws.region` | String | Required | The AWS Region in which the Lambda function is located. -`aws.sts_role_arn`| String | Optional | The ARN of the role to assume before invoking the Lambda function. -`max_retries` | Integer | Optional | The maximum number of retries for failed invocations. Default is `3`. -`batch` | Object | Optional | The batch settings for the Lambda invocations. Default is `key_name = "events"`. Default threshold is `event_count=100`, `maximum_size="5mb"`, and `event_collect_timeout = 10s`. -`lambda_when` | String | Optional | A conditional expression that determines when to invoke the Lambda processor. -`dlq` | Object | Optional | A dead-letter queue (DLQ) configuration for failed invocations. - -#### Example configuration - -``` -sink: - - aws_lambda: - function_name: "my-lambda-sink" - invocation_type: "event" - aws: - region: "us-west-2" - sts_role_arn: "arn:aws:iam::123456789012:role/my-lambda-sink-role" - max_retries: 5 - batch: - key_name: "events" - threshold: - event_count: 50 - maximum_size: "3mb" - event_collect_timeout: PT5S - lambda_when: "event['type'] == 'log'" - dlq: - region: "us-east-1" - sts_role_arn: "arn:aws:iam::123456789012:role/my-sqs-role" - bucket: "<>" -``` -{% include copy-curl.html %} -## Usage - -The sink supports the following invocation types: - -- `event`: The function is triggered asynchronously without waiting for a response. -- `request-response`: Not supported for sink operations. -- `Batching`: When enabled, events are aggregated and sent in bulk to optimize Lambda invocations. Default is `enabled`. -- `DLQ`: A setup available for routing and processing events that persistently fail Lambda invocations after multiple retry attempts. - -## Advanced configurations - -The AWS Lambda processor and sink provide the following advanced options for security and performance optimization: - -- AWS Identity and Access Management (IAM) role assumption: The processor and sink support assuming the specified IAM role `aws.sts_role_arn` before Lambda invocation. This enhances secure handling by providing access control to AWS resources. -- Concurrency management: When using the `event` invocation type, consider Lambda concurrency limits to avoid throttling. - -For more information about AWS Lambda integration with Data Prepper, see the [AWS Lambda documentation](https://docs.aws.amazon.com/lambda). - -## Integration testing - -Integration tests for this plugin are executed separately from the main Data Prepper build process. Use the following Gradle command to run these tests: - -``` -./gradlew :data-prepper-plugins:aws-lambda:integrationTest -Dtests.sink.lambda.region="us-east-1" -Dtests.sink.lambda.functionName="lambda_test_function" -Dtests.sink.lambda.sts_role_arn="arn:aws:iam::123456789012:role/dataprepper-role -``` {% include copy-curl.html %} diff --git a/_data-prepper/pipelines/configuration/sinks/aws-lambda.md b/_data-prepper/pipelines/configuration/sinks/aws-lambda.md new file mode 100644 index 0000000000..d8c00bdb16 --- /dev/null +++ b/_data-prepper/pipelines/configuration/sinks/aws-lambda.md @@ -0,0 +1,73 @@ +--- +layout: default +title: aws_lambda +parent: Sinks +grand_parent: Pipelines +nav_order: 10 +--- + +---------------------------------------------------------------------------------------- +# `aws_lambda` sink for Data Prepper + +This page explains how to configure and use [AWS Lambda](https://aws.amazon.com/lambda/) with Data Prepper, enabling Lambda functions to serve as both processors and sinks. + +## `aws_lambda` sink + +Configure the Lambda sink using the following parameters. + +Field | Type | Required | Description +--------------------| ------- | -------- | ---------------------------------------------------------------------------- +`function_name` | String | Yes | The name of the AWS Lambda function to invoke. +`invocation_type` | String | No | Specifies the invocation type. Default is `event`. +`aws.region` | String | Yes | The AWS Region in which the Lambda function is located. +`aws.sts_role_arn` | String | No | The Amazon Resource Name (ARN) of the role to assume before invoking the Lambda function. +`max_retries` | Integer | No | The maximum number of retries if the invocation fails. Default is `3`. +`batch` | Object | No | Optional batch settings for Lambda invocations. Default is `key_name = events`. Default threshold is `event_count=100`, `maximum_size="5mb"`, and `event_collect_timeout = 10s`. +`lambda_when` | String | No | A conditional expression that determines when to invoke the Lambda sink. +`dlq` | Object | No | The dead-letter queue (DLQ) configuration for failed invocations. + +#### Example configuration + +``` +sink: + - aws_lambda: + function_name: "my-lambda-sink" + invocation_type: "event" + aws: + region: "us-west-2" + sts_role_arn: "arn:aws:iam::123456789012:role/my-lambda-sink-role" + max_retries: 5 + batch: + key_name: "events" + threshold: + event_count: 50 + maximum_size: "3mb" + event_collect_timeout: PT5S + lambda_when: "event['type'] == 'log'" + dlq: + region: "us-east-1" + sts_role_arn: "arn:aws:iam::123456789012:role/my-sqs-role" + bucket: "<>" +``` +{% include copy-curl.html %} + +## Usage + +The invocation types are as follows: + +- `event` (Default): Executes functions asynchronously without waiting for responses. +- `request-response` (Sink only): Executes functions synchronously, though responses are not processed. +- `batch`: Automatically groups events based on configured thresholds. +- `dlq`: Supports the DLQ configuration for failed invocations after retry attempts. + +Data Prepper components use an AWS Identity and Access Management (IAM) role assumption, `aws.sts_role_arn`, for secure Lambda function invocation and respect Lambda's concurrency limits during event processing. For more information, see the [AWS Lambda documentation](https://docs.aws.amazon.com/lambda). +{: .note} + +## Developer guide + +Integration tests must be executed separately from the main Data Prepper build. Execute them with the following command: + +``` +./gradlew :data-prepper-plugins:aws-lambda:integrationTest -Dtests.sink.lambda.region="us-east-1" -Dtests.sink.lambda.functionName="lambda_test_function" -Dtests.sink.lambda.sts_role_arn="arn:aws:iam::123456789012:role/dataprepper-role +``` +{% include copy-curl.html %}