Skip to content

Commit

Permalink
[Heartbeat] improve monitor state loader failure attempts (#39621)
Browse files Browse the repository at this point in the history
* [Heartbeat] improve state loader failure logs

* try increasing timeouts

* exit when there is no error

* add state loader id
  • Loading branch information
vigneshshanmugam authored May 24, 2024
1 parent 9b35123 commit d5bfebb
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]

*Heartbeat*

- Fix monitor state loader to not wait extra seconds for the last attempt {pull}39621[39621]

*Metricbeat*

- Setting period for counter cache for Prometheus remote_write at least to 60sec {pull}38553[38553]
Expand Down
16 changes: 9 additions & 7 deletions heartbeat/monitors/wrappers/monitorstate/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields, rc RetryConfig)
var loadedState *State
var err error
var i int
for i = 0; i < attempts; i++ {
for i = 1; i <= attempts; i++ {
loadedState, err = t.stateLoader(sf)
if err == nil {
if loadedState != nil {
Expand All @@ -111,7 +111,13 @@ func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields, rc RetryConfig)
}
var loaderError LoaderError
if errors.As(err, &loaderError) && !loaderError.Retry {
logp.L().Warnf("could not load last externally recorded state: %v", loaderError)
logp.L().Warnf("failed to load previous monitor state: %v", loaderError)
break
}

// last attempt, exit and log error without sleeping
if i == attempts {
logp.L().Warnf("failed to load previous monitor state: %s after %d attempts: %v", sf.ID, i, err)
break
}

Expand All @@ -120,17 +126,13 @@ func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields, rc RetryConfig)
if rc.waitFn != nil {
sleepFor = rc.waitFn()
}
logp.L().Warnf("could not load last externally recorded state, will retry again in %d milliseconds: %v", sleepFor.Milliseconds(), err)
logp.L().Warnf("could not load previous monitor state, retrying in %d milliseconds: %v", sleepFor.Milliseconds(), err)
time.Sleep(sleepFor)
}
if err != nil {
logp.L().Warnf("could not load prior state from elasticsearch after %d attempts, will create new state for monitor: %s", i+1, sf.ID)
}

if loadedState != nil {
t.states[sf.ID] = loadedState
}

// Return what we found, even if nil
return loadedState
}
Expand Down

0 comments on commit d5bfebb

Please sign in to comment.