From b30b2f365a3e17f5194eb4fd6ce426d0726206e5 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Fri, 28 Apr 2023 14:23:34 -0700 Subject: [PATCH] v11.4.0; support for pre-registering partial transactions for AWS Lambda (#199) Refs: https://github.com/elastic/apm-agent-nodejs/issues/3136 --- CHANGELOG.md | 11 ++++ README.md | 69 ++++++++++++++++++------ index.js | 110 ++++++++++++++++++++++++++++++++++++-- package.json | 2 +- test/lambda-usage.test.js | 28 +++++++++- 5 files changed, 198 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac059f2..8c8bf20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # elastic-apm-http-client changelog +## v11.4.0 + +- Add support for pre-registering of partial transactions for AWS Lambda. + This adds `client.lambdaShouldRegisterTransactions()` and + `client.lambdaRegisterTransaction(transaction, awsRequestId)` so the + APM agent can register a partial transaction with the Elastic Lambda + extension before executing the user's handler. In some error cases + (`uncaughtException`, `unhandledRejection`, Lambda timeout), the extension + can report that transaction when the APM agent is unable. + (https://github.com/elastic/apm-agent-nodejs/issues/3136) + ## v11.3.1 - Tweak logic to only exclude `metadata.service.agent.activation_method` when diff --git a/README.md b/README.md index ede80fa..d08b0d3 100644 --- a/README.md +++ b/README.md @@ -363,29 +363,29 @@ Client creation. ### `client.lambdaStart()` Tells the client that a Lambda function invocation has started. +See [Notes on Lambda Usage](#notes-on-lambda-usage) below. -#### Notes on Lambda usage +### `client.lambdaShouldRegisterTransactions()` -To properly handle [data flushing for instrumented Lambda functions](https://github.com/elastic/apm/blob/main/specs/agents/tracing-instrumentation-aws-lambda.md#data-flushing) -this Client should be used as follows in a Lambda environment. +This returns a boolean indicating if the APM agent -- when running in a Lambda +environment -- should bother calling `client.lambdaRegisterTransaction(...)`. +This can help the APM agent avoid some processing gathering transaction data. -- When a Lambda invocation starts, `client.lambdaStart()` must be called. +Typically the reason this would return `false` is when the Lambda extension is +too old to support registering transactions. - The Client prevents intake requests to APM Server when in a Lambda environment - when a function invocation is *not* active. This is to ensure that an intake - request does not accidentally span a period when a Lambda VM is frozen, - which can lead to timeouts and lost APM data. -- When a Lambda invocation finishes, `client.flush({lambdaEnd: true}, cb)` must - be called. +### `client.lambdaRegisterTransaction(transaction, awsRequestId)` - The `lambdaEnd: true` tells the Client to (a) mark the lambda as inactive so - a subsequent intake request is not started until the next invocation, and - (b) signal the Elastic AWS Lambda Extension that this invocation is done. - The user's Lambda handler should not finish until `cb` is called. This - ensures that the extension receives tracing data and the end signal before - the Lambda Runtime freezes the VM. +Tells the Lambda Extension about the ongoing transaction, so that data can be +used to report the transaction in certain error cases -- e.g. a Lambda handler +timeout. See [Notes on Lambda Usage](#notes-on-lambda-usage) below. + +Arguments: +- `transaction` - A transaction object that can be serialized to JSON. +- `awsRequestId` - The AWS request ID for this invocation. This is a UUID + available on the Lambda context object. ### `client.sendSpan(span[, callback])` @@ -462,6 +462,43 @@ Destroy the `client`. After this call, the client has ended and subsequent calls to `sendSpan()`, `sendTransaction()`, `sendError()`, `flush()`, or `end()` will result in an error. +## Notes on Lambda usage + +To properly handle [data flushing for instrumented Lambda functions](https://github.com/elastic/apm/blob/main/specs/agents/tracing-instrumentation-aws-lambda.md#data-flushing) +this Client should be used as follows in a Lambda environment. + + 1. Ensure that metadata is set before any of the following calls. Typically + in Lambda this is done by (a) configuring the client with + `expectExtraMetadata` and (b) calling `setExtraMetadata()` at the start of + the first invocation. + + 2. When a Lambda invocation starts, `client.lambdaStart()` must be called. + The Client prevents intake requests to APM Server when in a Lambda + environment when a function invocation is *not* active. This is to ensure + that an intake request does not accidentally span a period when a Lambda VM + is frozen, which can lead to timeouts and lost APM data. + + 3. When the transaction for this Lambda invocation has been created, + `await client.lambdaRegisterTransaction(, )` should be + called. This is used to pass transaction details to the Lambda Extension so + a transaction can be reported in certain failure modes (e.g. a Lambda + handler timeout). + + `client.lambdaShouldRegisterTransactions()` can be used to avoid gathering + data for this call. + + 4. When a Lambda invocation finishes, `client.flush({lambdaEnd: true}, cb)` + must be called. + + The `lambdaEnd: true` tells the Client to (a) mark the lambda as inactive so + a subsequent intake request is not started until the next invocation, and + (b) signal the Elastic AWS Lambda Extension that this invocation is done. + The user's Lambda handler should not finish until `cb` is called. This + ensures that the extension receives tracing data and the end signal before + the Lambda Runtime freezes the VM. + + + ## License [MIT](LICENSE) diff --git a/index.js b/index.js index 621a135..6bf997d 100644 --- a/index.js +++ b/index.js @@ -111,6 +111,9 @@ function Client (opts) { // _lambdaActive indicates if a Lambda function invocation is active. It is // only meaningful if `isLambdaExecutionEnvironment`. this._lambdaActive = false + // Whether to forward `.lambdaRegisterTransaction()` calls to the Lambda + // extension. This will be set false if a previous attempt failed. + this._lambdaShouldRegisterTransactions = true // Internal runtime stats for developer debugging/tuning. this._numEvents = 0 // number of events given to the client @@ -312,6 +315,7 @@ Client.prototype.config = function (opts) { this._conf.requestIntake = getIntakeRequestOptions(this._conf, this._agent) this._conf.requestConfig = getConfigRequestOptions(this._conf, this._agent) this._conf.requestSignalLambdaEnd = getSignalLambdaEndRequestOptions(this._conf, this._agent) + this._conf.requestRegisterTransaction = getRegisterTransactionRequestOptions(this._conf, this._agent) // fixes bug where cached/memoized _encodedMetadata wouldn't be // updated when client was reconfigured @@ -649,6 +653,101 @@ Client.prototype.lambdaStart = function () { this._lambdaActive = true } +/** + * Indicate whether the APM agent -- when in a Lambda environment -- should + * bother calling `.lambdaRegisterTransaction(...)`. + * + * @returns {boolean} + */ +Client.prototype.lambdaShouldRegisterTransactions = function () { + return this._lambdaShouldRegisterTransactions +} + +/** + * Tell the local Lambda extension about the just-started transaction. This + * allows the extension to report the transaction in certain error cases + * where the APM agent isn't able to *end* the transaction and report it, + * e.g. if the function is about to timeout, or if the process crashes. + * + * The expected request is as follows, and a 200 status code is expected in + * response: + * + * POST /register/transaction + * Content-Type: application/vnd.elastic.apm.transaction+ndjson + * x-elastic-aws-request-id: ${awsRequestId} + * + * {"metadata":{...}} + * {"transaction":{...partial transaction data...}} + * + * @param {object} trans - a mostly complete APM Transaction object. It should + * have a default `outcome` value. `duration` and `result` (and possibly + * `outcome`) fields will be set by the Elastic Lambda extension if this + * transaction is used. + * @param {import('crypto').UUID} awsRequestId + * @returns {Promise || undefined} So this can, and should, be `await`ed. + * If returning a promise, it will only resolve, never reject. + */ +Client.prototype.lambdaRegisterTransaction = function (trans, awsRequestId) { + if (!isLambdaExecutionEnvironment) { + return + } + if (!this._lambdaShouldRegisterTransactions) { + return + } + assert(this._encodedMetadata, '_encodedMetadata is set') + + // We expect to be talking to the localhost Elastic Lambda extension, so we + // want a shorter timeout than `_conf.serverTimeout`. + const TIMEOUT_MS = 5000 + const startTime = performance.now() + + return new Promise((resolve, reject) => { + this._log.trace({ awsRequestId, traceId: trans.trace_id, transId: trans.id }, 'lambdaRegisterTransaction start') + var out = this._encode({ transaction: trans }, Client.encoding.TRANSACTION) + + const finish = errOrErrMsg => { + const durationMs = performance.now() - startTime + if (errOrErrMsg) { + this._log.debug({ awsRequestId, err: errOrErrMsg, durationMs }, 'lambdaRegisterTransaction unsuccessful') + this._lambdaShouldRegisterTransactions = false + } else { + this._log.trace({ awsRequestId, durationMs }, 'lambdaRegisterTransaction success') + } + resolve() // always resolve, never reject + } + + // Every `POST /register/transaction` request must set the + // `x-elastic-aws-request-id` header. Instead of creating a new options obj + // each time, we just modify in-place. + this._conf.requestRegisterTransaction.headers['x-elastic-aws-request-id'] = awsRequestId + + const req = this._transportRequest(this._conf.requestRegisterTransaction, res => { + res.on('error', err => { + // Not sure this event can ever be emitted, but just in case. + res.destroy(err) + }) + res.resume() + if (res.statusCode !== 200) { + finish(`unexpected response status code: ${res.statusCode}`) + return + } + res.on('end', function () { + finish() + }) + }) + req.setTimeout(TIMEOUT_MS) + req.on('timeout', () => { + req.destroy(new Error(`timeout (${TIMEOUT_MS}ms) registering lambda transaction`)) + }) + req.on('error', err => { + finish(err) + }) + req.write(this._encodedMetadata) + req.write(out) + req.end() + }) +} + // With the cork/uncork handling on this stream, `this.write`ing on this // stream when already destroyed will lead to: // Error: Cannot call write after a stream was destroyed @@ -1160,9 +1259,9 @@ Client.prototype._signalLambdaEnd = function (cb) { const finish = errOrErrMsg => { const durationMs = performance.now() - startTime if (errOrErrMsg) { - this._log.error({ err: errOrErrMsg, durationMs }, 'error signaling lambda invocation done') + this._log.error({ err: errOrErrMsg, durationMs }, '_signalLambdaEnd error') } else { - this._log.trace({ durationMs }, 'signaled lambda invocation done') + this._log.trace({ durationMs }, '_signalLambdaEnd success') } cb() } @@ -1187,7 +1286,6 @@ Client.prototype._signalLambdaEnd = function (cb) { }) req.setTimeout(TIMEOUT_MS) req.on('timeout', () => { - this._log.trace('_signalLambdaEnd timeout') req.destroy(new Error(`timeout (${TIMEOUT_MS}ms) signaling Lambda invocation done`)) }) req.on('error', err => { @@ -1321,6 +1419,12 @@ function getSignalLambdaEndRequestOptions (opts, agent) { return getBasicRequestOptions('POST', '/intake/v2/events?flushed=true', headers, opts, agent) } +function getRegisterTransactionRequestOptions (opts, agent) { + const headers = getHeaders(opts) + headers['Content-Type'] = 'application/vnd.elastic.apm.transaction+ndjson' + return getBasicRequestOptions('POST', '/register/transaction', headers, opts, agent) +} + function getConfigRequestOptions (opts, agent) { const path = '/config/v1/agents?' + querystring.stringify({ 'service.name': opts.serviceName, diff --git a/package.json b/package.json index 4899245..079bc33 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "elastic-apm-http-client", - "version": "11.3.1", + "version": "11.4.0", "description": "A low-level HTTP client for communicating with the Elastic APM intake API", "main": "index.js", "directories": { diff --git a/test/lambda-usage.test.js b/test/lambda-usage.test.js index e76c8ef..a1d8033 100644 --- a/test/lambda-usage.test.js +++ b/test/lambda-usage.test.js @@ -23,6 +23,22 @@ test('lambda usage', suite => { test('setup mock APM server', t => { server = APMServer(function (req, res) { + if (req.method === 'POST' && req.url === '/register/transaction') { + req.resume() + req.on('end', () => { + res.writeHead(200) + res.end() + }) + return + } else if (!(req.method === 'POST' && req.url.startsWith('/intake/v2/events'))) { + req.resume() + req.on('end', () => { + res.writeHead(404) + res.end() + }) + return + } + // Capture intake req data to this mock APM server to `reqsToServer`. const reqInfo = { method: req.method, @@ -78,13 +94,21 @@ test('lambda usage', suite => { }, client._conf.bufferWindowTime + 10) }) - test('lambda invocation', t => { + test('lambda invocation', async (t) => { client.lambdaStart() // 1. start of invocation + + // 2. Registering transaction + t.equal(client.lambdaShouldRegisterTransactions(), true, '.lambdaShouldRegisterTransactions() is true') + await client.lambdaRegisterTransaction( + { name: 'GET /aStage/myFn', type: 'lambda', outcome: 'unknown' /* ... */ }, + '063de0d2-1705-4eeb-9dfd-045d76b8cdec') + t.equal(client.lambdaShouldRegisterTransactions(), true, '.lambdaShouldRegisterTransactions() is true after register') + setTimeout(() => { client.sendTransaction({ name: 'GET /aStage/myFn', type: 'lambda', result: 'success' /* ... */ }) client.sendSpan({ name: 'mySpan', type: 'custom', result: 'success' /* ... */ }) - // 2. end of invocation + // 3. Flush at end of invocation client.flush({ lambdaEnd: true }, function () { t.ok(reqsToServer.length > 1, 'at least 2 intake requests to APM Server') t.equal(reqsToServer[reqsToServer.length - 1].url.searchParams.get('flushed'), 'true',