Skip to content

Commit

Permalink
Skip "file already closed" errors in generator storage instance test (#…
Browse files Browse the repository at this point in the history
…3882)

* Ignore ErrClosed in generator storage instance test

Occasionally we see "file already closed" errors in our CI.  There
appears to be a race in the syscall to close the file.

* Stop the test ticker

* Check the contents of the error and not the chain

* Add a note to the registry commit

* Add registry note

* Fix nested assertions

* Cover additional appender.Commit() tests

* Check the context.Err right before commit

* Avoid error checking and only cancel early

* Update comment
  • Loading branch information
zalegrala authored Jul 24, 2024
1 parent c27bf8b commit d22ff77
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
7 changes: 7 additions & 0 deletions modules/generator/registry/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,13 @@ func (r *ManagedRegistry) collectMetrics(ctx context.Context) {
maxActiveSeries := r.overrides.MetricsGeneratorMaxActiveSeries(r.tenant)
r.metricMaxActiveSeries.Set(float64(maxActiveSeries))

// Try to avoid committing after we have started the shutdown process.
if ctx.Err() != nil { // shutdown
return
}

// If the shutdown has started here, a "file already closed" error will be
// observed here.
err = appender.Commit()
if err != nil {
return
Expand Down
7 changes: 6 additions & 1 deletion modules/generator/storage/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ func TestInstance(t *testing.T) {
mockServer.refuseRequests.Store(false)

// Shutdown the instance - even though previous requests failed, remote.Storage should flush pending data
cancel()
err = instance.Close()
assert.NoError(t, err)

Expand Down Expand Up @@ -156,6 +157,7 @@ func TestInstance_multiTenancy(t *testing.T) {
})
require.NoError(t, err, "timed out while waiting for accepted requests")

cancel()
for _, instance := range instances {
// Shutdown the instance - remote write should flush pending data
err = instance.Close()
Expand Down Expand Up @@ -231,7 +233,8 @@ func TestInstance_remoteWriteHeaders(t *testing.T) {
return
}

assert.NoError(t, appender.Commit())
err = appender.Commit()
assert.NoError(t, err)
})

// Wait until remote.Storage has tried at least once to send data
Expand All @@ -247,6 +250,7 @@ func TestInstance_remoteWriteHeaders(t *testing.T) {
mockServer.refuseRequests.Store(false)

// Shutdown the instance - even though previous requests failed, remote.Storage should flush pending data
cancel()
err = instance.Close()
assert.NoError(t, err)

Expand Down Expand Up @@ -332,6 +336,7 @@ func (m *mockPrometheusRemoteWriteServer) close() {
// poll executes f every interval until ctx is done or cancelled.
func poll(ctx context.Context, interval time.Duration, f func()) {
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
select {
Expand Down

0 comments on commit d22ff77

Please sign in to comment.