diff --git a/lib/datadog/core/remote/client.rb b/lib/datadog/core/remote/client.rb index 911011515fa..8c5c2767245 100644 --- a/lib/datadog/core/remote/client.rb +++ b/lib/datadog/core/remote/client.rb @@ -10,6 +10,7 @@ module Core module Remote # Client communicates with the agent and sync remote configuration class Client + class TransportError < StandardError; end class SyncError < StandardError; end attr_reader :transport, :repository, :id, :dispatcher @@ -107,6 +108,8 @@ def sync else dispatcher.dispatch(changes, repository) end + elsif response.internal_error? + raise TransportError, response.to_s end end # rubocop:enable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity diff --git a/lib/datadog/core/remote/component.rb b/lib/datadog/core/remote/component.rb index 34838c8d75f..aea2ed828ca 100644 --- a/lib/datadog/core/remote/component.rb +++ b/lib/datadog/core/remote/component.rb @@ -45,6 +45,11 @@ def initialize(settings, capabilities, agent_settings) "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync" end rescue StandardError => e + # In case of unexpected errors, reset the negotiation object + # given external conditions have changed and the negotiation + # negotiation object stores error logging state that should be reset. + negotiation = Negotiation.new(settings, agent_settings) + Datadog.logger.error do "remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\ 'reseting client state' diff --git a/lib/datadog/core/remote/negotiation.rb b/lib/datadog/core/remote/negotiation.rb index f9307dfe041..823d330f5f6 100644 --- a/lib/datadog/core/remote/negotiation.rb +++ b/lib/datadog/core/remote/negotiation.rb @@ -12,31 +12,44 @@ def initialize(_settings, agent_settings) transport_options[:agent_settings] = agent_settings if agent_settings @transport_root = Datadog::Core::Transport::HTTP.root(**transport_options.dup) + @logged = {} end def endpoint?(path) res = @transport_root.send_info if res.internal_error? && network_error?(res.error) - Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" } + unless @logged[:agent_unreachable] + Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" } + @logged[:agent_unreachable] = true + end return false end if res.not_found? - Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } + unless @logged[:no_info_endpoint] + Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } + @logged[:no_info_endpoint] = true + end return false end unless res.ok? - Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" } + unless @logged[:unexpected_response] + Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" } + @logged[:unexpected_response] = true + end return false end unless res.endpoints.include?(path) - Datadog.logger.error { "agent reachable but does not report #{path}" } + unless @logged[:no_config_endpoint] + Datadog.logger.error { "agent reachable but does not report #{path}" } + @logged[:no_config_endpoint] = true + end return false end diff --git a/lib/datadog/core/transport/http/client.rb b/lib/datadog/core/transport/http/client.rb index 09437936060..89d1325daa5 100644 --- a/lib/datadog/core/transport/http/client.rb +++ b/lib/datadog/core/transport/http/client.rb @@ -30,7 +30,7 @@ def send_request(request, &block) "Internal error during #{self.class.name} request. Cause: #{e.class.name} #{e.message} " \ "Location: #{Array(e.backtrace).first}" - Datadog.logger.error(message) + Datadog.logger.debug(message) Datadog::Transport::InternalErrorResponse.new(e) end diff --git a/sig/datadog/core/remote/client.rbs b/sig/datadog/core/remote/client.rbs index 1bcdf76d7bf..0521f3bb25b 100644 --- a/sig/datadog/core/remote/client.rbs +++ b/sig/datadog/core/remote/client.rbs @@ -2,6 +2,9 @@ module Datadog module Core module Remote class Client + class TransportError < StandardError + end + class SyncError < StandardError end diff --git a/sig/datadog/core/remote/negotiation.rbs b/sig/datadog/core/remote/negotiation.rbs index d892890f040..126cfc398de 100644 --- a/sig/datadog/core/remote/negotiation.rbs +++ b/sig/datadog/core/remote/negotiation.rbs @@ -3,6 +3,7 @@ module Datadog module Remote class Negotiation @transport_root: Datadog::Core::Transport::Negotiation::Transport + @logged: ::Hash[::Symbol, bool] def initialize: (Datadog::Core::Configuration::Settings _settings, Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> void diff --git a/spec/datadog/core/remote/client_spec.rb b/spec/datadog/core/remote/client_spec.rb index 830f66d911e..1db669f53a9 100644 --- a/spec/datadog/core/remote/client_spec.rb +++ b/spec/datadog/core/remote/client_spec.rb @@ -12,7 +12,6 @@ allow(http_request).to receive(:body=) allow(request_class).to receive(:new).and_return(http_request) - http_connection = instance_double(::Net::HTTP) allow(::Net::HTTP).to receive(:new).and_return(http_connection) allow(http_connection).to receive(:open_timeout=) @@ -37,6 +36,7 @@ end end + let(:http_connection) { instance_double(::Net::HTTP) } let(:transport) { Datadog::Core::Transport::HTTP.v7(&proc { |_client| }) } let(:roots) do [ @@ -458,6 +458,14 @@ end end end + + context 'with a network error' do + it 'raises a transport error' do + expect(http_connection).to receive(:request).and_raise(IOError) + + expect { client.sync }.to raise_error(Datadog::Core::Remote::Client::TransportError) + end + end end describe '#payload' do diff --git a/spec/datadog/core/remote/component_spec.rb b/spec/datadog/core/remote/component_spec.rb index d0ff1dbc98f..ce99f0a7f63 100644 --- a/spec/datadog/core/remote/component_spec.rb +++ b/spec/datadog/core/remote/component_spec.rb @@ -61,7 +61,7 @@ before do expect(Datadog::Core::Transport::HTTP).to receive(:v7).and_return(transport_v7) expect(Datadog::Core::Remote::Client).to receive(:new).and_return(client) - expect(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation) + allow(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation) expect(worker).to receive(:start).and_call_original expect(worker).to receive(:stop).and_call_original @@ -118,6 +118,14 @@ expect(component.client.object_id).to eql(second_client.object_id) end + + it 'resets the negotiation object' do + allow(Datadog::Core::Remote::Client).to receive(:new).and_return(second_client) + + component.barrier(:once) + + expect(Datadog::Core::Remote::Negotiation).to have_received(:new).twice + end end context 'Client::SyncError' do diff --git a/spec/datadog/core/remote/negotiation_spec.rb b/spec/datadog/core/remote/negotiation_spec.rb index 556bdc2b252..ed1c503e38a 100644 --- a/spec/datadog/core/remote/negotiation_spec.rb +++ b/spec/datadog/core/remote/negotiation_spec.rb @@ -34,7 +34,8 @@ describe '#endpoint?' do include_context 'HTTP connection stub' - subject(:negotiation) { described_class.new(settings, agent_settings) } + subject(:endpoint?) { negotiation.endpoint?('/foo') } + let(:negotiation) { described_class.new(settings, agent_settings) } context 'when /info exists' do let(:response_code) { 200 } @@ -50,7 +51,7 @@ it do expect(Datadog.logger).to_not receive(:error) - expect(negotiation.endpoint?('/foo')).to be true + expect(endpoint?).to be true end it do @@ -68,7 +69,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent rejects request' do @@ -79,7 +87,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent is in error' do @@ -90,7 +105,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent causes an error' do @@ -100,20 +122,34 @@ end before do - expect(Datadog.logger).to receive(:error).twice.and_return(nil) + expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent is unreachable' do let(:request_exception) { Errno::ECONNREFUSED.new } before do - expect(Datadog.logger).to receive(:error).twice.and_return(nil) + expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end end end diff --git a/spec/datadog/core/transport/http_spec.rb b/spec/datadog/core/transport/http_spec.rb index e89cbd64037..f3b645fbf32 100644 --- a/spec/datadog/core/transport/http_spec.rb +++ b/spec/datadog/core/transport/http_spec.rb @@ -18,7 +18,6 @@ allow(http_request).to receive(:body=) allow(request_class).to receive(:new).and_return(http_request) - http_connection = instance_double(::Net::HTTP) allow(::Net::HTTP).to receive(:new).and_return(http_connection) allow(http_connection).to receive(:open_timeout=) @@ -32,6 +31,8 @@ end end + let(:http_connection) { instance_double(::Net::HTTP) } + describe '.root' do subject(:transport) { described_class.root(&client_options) } @@ -199,6 +200,16 @@ it { is_expected.to have_attributes(:roots => be_a(Array)) } it { is_expected.to have_attributes(:targets => be_a(Hash)) } it { is_expected.to have_attributes(:target_files => be_a(Array)) } + + context 'with a network error' do + it 'raises a transport error' do + expect(http_connection).to receive(:request).and_raise(IOError) + + expect(Datadog.logger).to receive(:debug).with(/IOError/) + + expect(response).to have_attributes(internal_error?: true) + end + end end end end