Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Panic in stacktrace mapping #326

Closed
lahsivjar opened this issue Jul 22, 2024 · 1 comment · Fixed by #327
Closed

Panic in stacktrace mapping #326

lahsivjar opened this issue Jul 22, 2024 · 1 comment · Fixed by #327
Assignees
Labels
bug Something isn't working

Comments

@lahsivjar
Copy link
Contributor

{"log.level":"error","@timestamp":"2024-07-22T15:24:51.682+0100","log.logger":"request","log.origin":{"function":"github.com/elastic/apm-server/internal/beater/api.apmMiddleware.LogMiddleware.func1.1","file.name":"middleware/log_middleware.go","file.line":59},"message":"internal error","service.name":"apm-server","url.original":"/intake/v2/events","http.request.method":"POST","user_agent.original":"Go-http-client/1.1","source.address":"127.0.0.1","http.request.id":"a390e2cf-1bbc-4485-9251-4f082c5b2cbe","event.duration":449042,"http.request.body.bytes":4096,"http.response.status_code":500,"error.message":"internal error: runtime error: invalid memory address or nil pointer dereference","error.stack_trace":"goroutine 180 [running]:
	runtime/debug.Stack()
		runtime/debug/stack.go:24 +0x64
	github.com/elastic/apm-server/internal/beater/api.apmMiddleware.RecoverPanicMiddleware.func2.1.1()
		github.com/elastic/apm-server/internal/beater/middleware/recover_panic_middleware.go:55 +0x134
	panic({0x106a3dd20?, 0x108025640?})
		runtime/panic.go:770 +0x124
	github.com/elastic/apm-data/input/elasticapm/internal/modeldecoder/v2.mapToStracktraceModel({0x1400f88a008?, 0x1a, 0x140028c2120?}, {0x14018f18f00, 0x1a, 0x140198f5ada?})
		github.com/elastic/[email protected]/input/elasticapm/internal/modeldecoder/v2/decoder.go:1320 +0x80
	github.com/elastic/apm-data/input/elasticapm/internal/modeldecoder/v2.mapToSpanModel(0x140115ec608, 0x1401860dce0)
		github.com/elastic/[email protected]/input/elasticapm/internal/modeldecoder/v2/decoder.go:1282 +0x13f0
	github.com/elastic/apm-data/input/elasticapm/internal/modeldecoder/v2.DecodeNestedSpan({0x106d5b2e0, 0x14018c351e0}, 0x140000bceb8, 0x14018c62c18)
		github.com/elastic/[email protected]/input/elasticapm/internal/modeldecoder/v2/decoder.go:218 +0x118
	github.com/elastic/apm-data/input/elasticapm.(*Processor).readBatch(0x1400062ea50, {0x1082a85b8?, 0x50?}, 0x14018f80580, 0x14018c114a0?, 0x14018c62c18, 0x14018c351e0, 0x14018bff380)
		github.com/elastic/[email protected]/input/elasticapm/processor.go:206 +0x1d8
	github.com/elastic/apm-data/input/elasticapm.(*Processor).handleStream(0x1400062ea50, {0x106d69418, 0x14018c0b470}, 0x14018c62c18, 0x0?, 0x0?, 0x0?, {0x106d54668, 0x140004001c8}, 0x14018bff380)
		github.com/elastic/[email protected]/input/elasticapm/processor.go:310 +0x4c
	github.com/elastic/apm-data/input/elasticapm.(*Processor).HandleStream(0x1400062ea50, {0x106d69418?, 0x14018c0b470?}, 0x14018f80580, {0x10f2ef308, 0x14018c11400}, 0xa, {0x106d54668, 0x140004001c8}, 0x14018bff380)
		github.com/elastic/[email protected]/input/elasticapm/processor.go:291 +0x480
	github.com/elastic/apm-server/internal/beater/api.(*routeBuilder).backendIntakeHandler.Handler.func2(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/api/intake/handler.go:77 +0x158
	github.com/elastic/apm-server/internal/beater/api.backendMiddleware.AnonymousRateLimitMiddleware.func3.1(0x14018bf6d80?)
		github.com/elastic/apm-server/internal/beater/middleware/rate_limit_middleware.go:49 +0xc4
	github.com/elastic/apm-server/internal/beater/api.backendMiddleware.AuthMiddleware.func2.1(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/middleware/auth_middleware.go:70 +0x1ac
	github.com/elastic/apm-server/internal/beater/api.backendMiddleware.ResponseHeadersMiddleware.func1.1(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/middleware/response_headers_middleware.go:33 +0xa4
	github.com/elastic/apm-server/internal/beater/middleware.MonitoringMiddleware.(*monitoringMiddleware).Middleware.func1.1(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/middleware/monitoring_middleware.go:53 +0xac
	github.com/elastic/apm-server/internal/beater/api.apmMiddleware.RecoverPanicMiddleware.func2.1(0x106923ce0?)
		github.com/elastic/apm-server/internal/beater/middleware/recover_panic_middleware.go:60 +0x50
	github.com/elastic/apm-server/internal/beater/middleware.TimeoutMiddleware.func1.1(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/middleware/timeout_middleware.go:35 +0x3c
	github.com/elastic/apm-server/internal/beater/api.apmMiddleware.LogMiddleware.func1.1(0x14014b97c80)
		github.com/elastic/apm-server/internal/beater/middleware/log_middleware.go:47 +0xa0
	github.com/elastic/apm-server/internal/beater/api.NewMux.(*ContextPool).HTTPHandler.func11({0x106d633d0, 0x14013e417a0}, 0x14018bf6d80)
		github.com/elastic/apm-server/internal/beater/request/context_pool.go:47 +0xe0
	net/http.HandlerFunc.ServeHTTP(0x14018bf6d80?, {0x106d633d0?, 0x14013e417a0?}, 0x14018bf6d80?)
		net/http/server.go:2166 +0x38
	go.elastic.co/apm/module/apmhttp/v2.(*handler).ServeHTTP(0x14018bf6c60?, {0x106d633d0?, 0x14013e417a0?}, 0x1082a85b8?)
		go.elastic.co/apm/module/apmhttp/[email protected]/handler.go:74 +0xac
	github.com/gorilla/mux.(*Router).ServeHTTP(0x140005ce240, {0x106d633d0, 0x14013e417a0}, 0x14018bf6b40)
		github.com/gorilla/[email protected]/mux.go:212 +0x194
	github.com/elastic/gmux.ConfigureServer.(*mux).withGRPCInsecure.func3({0x106d633d0?, 0x14013e417a0?}, 0x0?)
		github.com/elastic/[email protected]/mux.go:119 +0x120
	net/http.HandlerFunc.ServeHTTP(0x10?, {0x106d633d0?, 0x14013e417a0?}, 0x14013e417a0?)
		net/http/server.go:2166 +0x38
	net/http.serverHandler.ServeHTTP({0x106d5ec28?}, {0x106d633d0?, 0x14013e417a0?}, 0x6?)
		net/http/server.go:3137 +0xbc
	net/http.(*conn).serve(0x14002bf2090, {0x106d69418, 0x1400028f3e0})
		net/http/server.go:2039 +0x508
	created by net/http.(*Server).Serve in goroutine 169
		net/http/server.go:3285 +0x3f0
	","ecs.version":"1.6.0"}
@lahsivjar lahsivjar self-assigned this Jul 22, 2024
@simitt simitt added the bug Something isn't working label Jul 23, 2024
@lahsivjar
Copy link
Contributor Author

lahsivjar commented Jul 23, 2024

The root cause of this is that the Reslice function (ref) in APM data falsely assumes that the whole capacity of the slice is non-nil. The below test case can be used to reproduce the panic:

func TestBadReslice(t *testing.T) {
	metadata := `{"metadata": {"service": {"name": "1234_service-12a3","node": {"configured_name": "node-123"},"version": "5.1.3","environment": "staging","language": {"name": "ecmascript","version": "8"},"runtime": {"name": "node","version": "8.0.0"},"framework": {"name": "Express","version": "1.2.3"},"agent": {"name": "elastic-node","version": "3.14.0"}},"user": {"id": "123user", "username": "bar", "email": "[email protected]"}, "labels": {"tag0": null, "tag1": "one", "tag2": 2}, "process": {"pid": 1234,"ppid": 6789,"title": "node","argv": ["node","server.js"]},"system": {"hostname": "prod1.example.com","architecture": "x64","platform": "darwin", "container": {"id": "container-id"}, "kubernetes": {"namespace": "namespace1", "pod": {"uid": "pod-uid", "name": "pod-name"}, "node": {"name": "node-name"}}},"cloud":{"account":{"id":"account_id","name":"account_name"},"availability_zone":"cloud_availability_zone","instance":{"id":"instance_id","name":"instance_name"},"machine":{"type":"machine_type"},"project":{"id":"project_id","name":"project_name"},"provider":"cloud_provider","region":"cloud_region","service":{"name":"lambda"}}}}`
	errstacktrace := `{"error": {"id": "cdefab0123456789", "trace_id": null, "timestamp": 1533826745999000,"exception": {"message": "Cannot read property 'baz' no defined", "stacktrace": [{"classname": "cl1", "abs_path": "abs_path1"}, {"classname": "cl2", "abs_path": "abs_path2"}, {"classname": "cl3", "abs_path": "abs_path3"}, {"classname": "cl4", "abs_path": "abs_path4"}, {"classname": "cl5", "abs_path": "abs_path5"}]}}}`

	batchProcessor := modelpb.ProcessBatchFunc(func(ctx context.Context, batch *modelpb.Batch) error {
		return nil
	})
	p := NewProcessor(Config{
		MaxEventSize: 100 * 1024,
		Semaphore:    semaphore.NewWeighted(1),
	})

	// Below, we try to emulate a case where all of the capacity of Error#Exception#Stacktrace slice
	// is not populated with non-nil values. In practical cases, this could happen with OTLP handling
	// of exceptions: https://github.com/elastic/apm-data/blob/cee5ac3fc5f2ee66f156133992882cae758e2b66/input/otlp/exceptions.go#L165
	event1 := modelpb.APMEventFromVTPool()
	event1.Error = modelpb.ErrorFromVTPool()
	event1.Error.Exception = modelpb.ExceptionFromVTPool()
	for i := 0; i < 3; i++ {
		// Add 3 so that reallocation makes the capacity 4
		// this will cause the 4th element to be nil
		event1.Error.Exception.Stacktrace = append(event1.Error.Exception.Stacktrace, modelpb.StacktraceFrameFromVTPool())
	}
	require.Equal(t, 4, cap(event1.Error.Exception.Stacktrace))

	event1.ReturnToVTPool()
	event2 := modelpb.APMEventFromVTPool()
	payload2 := strings.Join([]string{metadata, errstacktrace}, "\n")
	err := p.HandleStream(
		context.Background(), event2,
		strings.NewReader(payload2), 10, batchProcessor,
		&Result{},
	)
	require.NoError(t, err)
}

I am working on a fix.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants