Skip to content

Commit

Permalink
Merge pull request #3011 from DataDog/reduce-rc-logging
Browse files Browse the repository at this point in the history
Reduce remote configuration error logging
  • Loading branch information
marcotc authored Jul 28, 2023
2 parents a197a82 + 88ce4d1 commit eed3de7
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 17 deletions.
3 changes: 3 additions & 0 deletions lib/datadog/core/remote/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Core
module Remote
# Client communicates with the agent and sync remote configuration
class Client
class TransportError < StandardError; end
class SyncError < StandardError; end

attr_reader :transport, :repository, :id, :dispatcher
Expand Down Expand Up @@ -107,6 +108,8 @@ def sync
else
dispatcher.dispatch(changes, repository)
end
elsif response.internal_error?
raise TransportError, response.to_s
end
end
# rubocop:enable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity
Expand Down
5 changes: 5 additions & 0 deletions lib/datadog/core/remote/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ def initialize(settings, capabilities, agent_settings)
"remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync"
end
rescue StandardError => e
# In case of unexpected errors, reset the negotiation object
# given external conditions have changed and the negotiation
# negotiation object stores error logging state that should be reset.
negotiation = Negotiation.new(settings, agent_settings)

Datadog.logger.error do
"remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\
'reseting client state'
Expand Down
21 changes: 17 additions & 4 deletions lib/datadog/core/remote/negotiation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,44 @@ def initialize(_settings, agent_settings)
transport_options[:agent_settings] = agent_settings if agent_settings

@transport_root = Datadog::Core::Transport::HTTP.root(**transport_options.dup)
@logged = {}
end

def endpoint?(path)
res = @transport_root.send_info

if res.internal_error? && network_error?(res.error)
Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
unless @logged[:agent_unreachable]
Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
@logged[:agent_unreachable] = true
end

return false
end

if res.not_found?
Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
unless @logged[:no_info_endpoint]
Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
@logged[:no_info_endpoint] = true
end

return false
end

unless res.ok?
Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
unless @logged[:unexpected_response]
Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
@logged[:unexpected_response] = true
end

return false
end

unless res.endpoints.include?(path)
Datadog.logger.error { "agent reachable but does not report #{path}" }
unless @logged[:no_config_endpoint]
Datadog.logger.error { "agent reachable but does not report #{path}" }
@logged[:no_config_endpoint] = true
end

return false
end
Expand Down
2 changes: 1 addition & 1 deletion lib/datadog/core/transport/http/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def send_request(request, &block)
"Internal error during #{self.class.name} request. Cause: #{e.class.name} #{e.message} " \
"Location: #{Array(e.backtrace).first}"

Datadog.logger.error(message)
Datadog.logger.debug(message)

Datadog::Transport::InternalErrorResponse.new(e)
end
Expand Down
3 changes: 3 additions & 0 deletions sig/datadog/core/remote/client.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ module Datadog
module Core
module Remote
class Client
class TransportError < StandardError
end

class SyncError < StandardError
end

Expand Down
1 change: 1 addition & 0 deletions sig/datadog/core/remote/negotiation.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Datadog
module Remote
class Negotiation
@transport_root: Datadog::Core::Transport::Negotiation::Transport
@logged: ::Hash[::Symbol, bool]

def initialize: (Datadog::Core::Configuration::Settings _settings, Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> void

Expand Down
10 changes: 9 additions & 1 deletion spec/datadog/core/remote/client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
allow(http_request).to receive(:body=)
allow(request_class).to receive(:new).and_return(http_request)

http_connection = instance_double(::Net::HTTP)
allow(::Net::HTTP).to receive(:new).and_return(http_connection)

allow(http_connection).to receive(:open_timeout=)
Expand All @@ -37,6 +36,7 @@
end
end

let(:http_connection) { instance_double(::Net::HTTP) }
let(:transport) { Datadog::Core::Transport::HTTP.v7(&proc { |_client| }) }
let(:roots) do
[
Expand Down Expand Up @@ -458,6 +458,14 @@
end
end
end

context 'with a network error' do
it 'raises a transport error' do
expect(http_connection).to receive(:request).and_raise(IOError)

expect { client.sync }.to raise_error(Datadog::Core::Remote::Client::TransportError)
end
end
end

describe '#payload' do
Expand Down
10 changes: 9 additions & 1 deletion spec/datadog/core/remote/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
before do
expect(Datadog::Core::Transport::HTTP).to receive(:v7).and_return(transport_v7)
expect(Datadog::Core::Remote::Client).to receive(:new).and_return(client)
expect(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation)
allow(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation)

expect(worker).to receive(:start).and_call_original
expect(worker).to receive(:stop).and_call_original
Expand Down Expand Up @@ -118,6 +118,14 @@

expect(component.client.object_id).to eql(second_client.object_id)
end

it 'resets the negotiation object' do
allow(Datadog::Core::Remote::Client).to receive(:new).and_return(second_client)

component.barrier(:once)

expect(Datadog::Core::Remote::Negotiation).to have_received(:new).twice
end
end

context 'Client::SyncError' do
Expand Down
54 changes: 45 additions & 9 deletions spec/datadog/core/remote/negotiation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
describe '#endpoint?' do
include_context 'HTTP connection stub'

subject(:negotiation) { described_class.new(settings, agent_settings) }
subject(:endpoint?) { negotiation.endpoint?('/foo') }
let(:negotiation) { described_class.new(settings, agent_settings) }

context 'when /info exists' do
let(:response_code) { 200 }
Expand All @@ -50,7 +51,7 @@
it do
expect(Datadog.logger).to_not receive(:error)

expect(negotiation.endpoint?('/foo')).to be true
expect(endpoint?).to be true
end

it do
Expand All @@ -68,7 +69,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent rejects request' do
Expand All @@ -79,7 +87,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent is in error' do
Expand All @@ -90,7 +105,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent causes an error' do
Expand All @@ -100,20 +122,34 @@
end

before do
expect(Datadog.logger).to receive(:error).twice.and_return(nil)
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent is unreachable' do
let(:request_exception) { Errno::ECONNREFUSED.new }

before do
expect(Datadog.logger).to receive(:error).twice.and_return(nil)
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end
end
end
13 changes: 12 additions & 1 deletion spec/datadog/core/transport/http_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
allow(http_request).to receive(:body=)
allow(request_class).to receive(:new).and_return(http_request)

http_connection = instance_double(::Net::HTTP)
allow(::Net::HTTP).to receive(:new).and_return(http_connection)

allow(http_connection).to receive(:open_timeout=)
Expand All @@ -32,6 +31,8 @@
end
end

let(:http_connection) { instance_double(::Net::HTTP) }

describe '.root' do
subject(:transport) { described_class.root(&client_options) }

Expand Down Expand Up @@ -199,6 +200,16 @@
it { is_expected.to have_attributes(:roots => be_a(Array)) }
it { is_expected.to have_attributes(:targets => be_a(Hash)) }
it { is_expected.to have_attributes(:target_files => be_a(Array)) }

context 'with a network error' do
it 'raises a transport error' do
expect(http_connection).to receive(:request).and_raise(IOError)

expect(Datadog.logger).to receive(:debug).with(/IOError/)

expect(response).to have_attributes(internal_error?: true)
end
end
end
end
end

0 comments on commit eed3de7

Please sign in to comment.