From 0f3d691a79eda430d72e5316540d010db324219c Mon Sep 17 00:00:00 2001 From: Edoardo Spadolini Date: Wed, 13 Nov 2024 18:46:14 +0100 Subject: [PATCH] Periodically check connectivity between peer proxies (#48838) * Add ping RPC to grpc proxy peering * Periodically check connectivity between peer proxies * Replace deprecated jitter functions --- api/client/proto/proxyservice.pb.go | 359 ++++++++++++++++-- .../legacy/client/proto/proxyservice.proto | 7 + lib/proxy/peer/client.go | 89 ++++- lib/proxy/peer/client_test.go | 24 +- lib/proxy/peer/helpers_test.go | 1 + lib/proxy/peer/internal/clientconn.go | 3 + lib/proxy/peer/internal/metrics.go | 109 ++++++ lib/proxy/peer/service.go | 5 + 8 files changed, 549 insertions(+), 48 deletions(-) create mode 100644 lib/proxy/peer/internal/metrics.go diff --git a/api/client/proto/proxyservice.pb.go b/api/client/proto/proxyservice.pb.go index 4b088e27d928d..d2695c0784251 100644 --- a/api/client/proto/proxyservice.pb.go +++ b/api/client/proto/proxyservice.pb.go @@ -353,12 +353,92 @@ func (m *ConnectionEstablished) XXX_DiscardUnknown() { var xxx_messageInfo_ConnectionEstablished proto.InternalMessageInfo +type ProxyServicePingRequest struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProxyServicePingRequest) Reset() { *m = ProxyServicePingRequest{} } +func (m *ProxyServicePingRequest) String() string { return proto.CompactTextString(m) } +func (*ProxyServicePingRequest) ProtoMessage() {} +func (*ProxyServicePingRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_b76fff22d4479739, []int{5} +} +func (m *ProxyServicePingRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ProxyServicePingRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ProxyServicePingRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ProxyServicePingRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProxyServicePingRequest.Merge(m, src) +} +func (m *ProxyServicePingRequest) XXX_Size() int { + return m.Size() +} +func (m *ProxyServicePingRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ProxyServicePingRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ProxyServicePingRequest proto.InternalMessageInfo + +type ProxyServicePingResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProxyServicePingResponse) Reset() { *m = ProxyServicePingResponse{} } +func (m *ProxyServicePingResponse) String() string { return proto.CompactTextString(m) } +func (*ProxyServicePingResponse) ProtoMessage() {} +func (*ProxyServicePingResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_b76fff22d4479739, []int{6} +} +func (m *ProxyServicePingResponse) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ProxyServicePingResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ProxyServicePingResponse.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ProxyServicePingResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProxyServicePingResponse.Merge(m, src) +} +func (m *ProxyServicePingResponse) XXX_Size() int { + return m.Size() +} +func (m *ProxyServicePingResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ProxyServicePingResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ProxyServicePingResponse proto.InternalMessageInfo + func init() { proto.RegisterType((*Frame)(nil), "proto.Frame") proto.RegisterType((*DialRequest)(nil), "proto.DialRequest") proto.RegisterType((*NetAddr)(nil), "proto.NetAddr") proto.RegisterType((*Data)(nil), "proto.Data") proto.RegisterType((*ConnectionEstablished)(nil), "proto.ConnectionEstablished") + proto.RegisterType((*ProxyServicePingRequest)(nil), "proto.ProxyServicePingRequest") + proto.RegisterType((*ProxyServicePingResponse)(nil), "proto.ProxyServicePingResponse") } func init() { @@ -366,34 +446,36 @@ func init() { } var fileDescriptor_b76fff22d4479739 = []byte{ - // 422 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x52, 0x41, 0x6f, 0xd3, 0x30, - 0x14, 0xae, 0xa1, 0xed, 0xe8, 0x4b, 0xc5, 0xc1, 0x1a, 0x10, 0x4d, 0x53, 0x81, 0x1c, 0xd0, 0xc4, - 0x21, 0x99, 0x8a, 0x34, 0xa4, 0x9d, 0x20, 0x04, 0x54, 0x0e, 0x4c, 0xc8, 0xeb, 0x69, 0x37, 0x37, - 0x79, 0xca, 0x2c, 0xb2, 0x38, 0xd8, 0xce, 0x20, 0xbf, 0x8f, 0x0b, 0x47, 0xee, 0x48, 0x08, 0xf5, - 0x67, 0x70, 0x42, 0x71, 0x5c, 0x35, 0x95, 0x8a, 0xc4, 0x25, 0x7e, 0xcf, 0xef, 0x7b, 0x5f, 0xbe, - 0xf7, 0xf9, 0x41, 0x64, 0xb0, 0xc0, 0x4a, 0x2a, 0x13, 0x15, 0x98, 0xf3, 0xb4, 0x89, 0xd2, 0x42, - 0x60, 0x69, 0xa2, 0x4a, 0x49, 0x23, 0xdb, 0xef, 0xd7, 0x46, 0xa3, 0xba, 0x15, 0x29, 0x86, 0xf6, - 0x8a, 0x8e, 0xec, 0x71, 0x74, 0x98, 0xcb, 0x5c, 0x76, 0xa0, 0x36, 0xea, 0x8a, 0xc1, 0x37, 0x02, - 0xa3, 0x77, 0x8a, 0xdf, 0x20, 0x3d, 0x03, 0x2f, 0x11, 0xbc, 0x60, 0xf8, 0xb9, 0x46, 0x6d, 0x7c, - 0xf2, 0x84, 0x9c, 0x78, 0x73, 0xda, 0xc1, 0xc2, 0x5e, 0x65, 0x31, 0x60, 0x7d, 0x20, 0x5d, 0xc2, - 0x83, 0x37, 0xb2, 0x2c, 0x31, 0x35, 0x42, 0x96, 0x6f, 0xb5, 0xe1, 0xab, 0x42, 0xe8, 0x6b, 0xcc, - 0xfc, 0x3b, 0x96, 0xe1, 0xd8, 0x31, 0xec, 0xc5, 0x2c, 0x06, 0x6c, 0x7f, 0x33, 0x7d, 0x0a, 0xc3, - 0x84, 0x1b, 0xee, 0xdf, 0xb5, 0x24, 0xde, 0x46, 0x06, 0x37, 0x7c, 0x31, 0x60, 0xb6, 0x14, 0x4f, - 0xe0, 0xe0, 0x03, 0x6a, 0xcd, 0x73, 0x0c, 0x7e, 0x92, 0x1d, 0xf1, 0xf4, 0x21, 0x8c, 0x2f, 0x64, - 0x86, 0xef, 0x13, 0x3b, 0xc6, 0x84, 0xb9, 0x8c, 0x5e, 0x01, 0x2c, 0xeb, 0xb2, 0xc4, 0x62, 0xd9, - 0x54, 0x68, 0x05, 0x4e, 0xe2, 0xf3, 0x3f, 0xbf, 0x1e, 0x9f, 0xe5, 0xc2, 0x5c, 0xd7, 0xab, 0x30, - 0x95, 0x37, 0x51, 0xae, 0xf8, 0xad, 0x30, 0xbc, 0x15, 0xc4, 0x8b, 0xad, 0xd9, 0xbc, 0x12, 0x91, - 0x69, 0x2a, 0xd4, 0xe1, 0x96, 0x81, 0xf5, 0xd8, 0xe8, 0x33, 0x18, 0x5f, 0xca, 0x5a, 0xa5, 0xe8, - 0x34, 0xdf, 0x77, 0x9a, 0x2f, 0xd0, 0xbc, 0xce, 0x32, 0xc5, 0x5c, 0x95, 0x9e, 0x82, 0x97, 0xa0, - 0x36, 0xa2, 0xb4, 0xbf, 0xf0, 0x87, 0x7b, 0xc1, 0x7d, 0x48, 0xf0, 0x12, 0x0e, 0xdc, 0x3d, 0xf5, - 0x6d, 0xf8, 0x45, 0xaa, 0x4f, 0x6e, 0xb2, 0x4d, 0x4a, 0x29, 0x0c, 0x5b, 0x44, 0x37, 0x14, 0xb3, - 0x71, 0x70, 0xdc, 0x99, 0x48, 0x0f, 0x61, 0x14, 0x37, 0x06, 0xb5, 0xed, 0x99, 0xb2, 0x2e, 0x09, - 0x1e, 0xfd, 0xe3, 0xe1, 0xe6, 0xe7, 0x30, 0xfd, 0xd8, 0xae, 0xd1, 0x65, 0xb7, 0x46, 0xf4, 0x39, - 0xdc, 0x6b, 0xcd, 0x6d, 0x3d, 0xa4, 0x53, 0x27, 0xd4, 0xee, 0xcc, 0xd1, 0x4e, 0x76, 0x42, 0x4e, - 0x49, 0xfc, 0xea, 0xfb, 0x7a, 0x46, 0x7e, 0xac, 0x67, 0xe4, 0xf7, 0x7a, 0x46, 0xae, 0xe6, 0xff, - 0xe7, 0x6b, 0x7f, 0x83, 0x57, 0x63, 0x7b, 0xbc, 0xf8, 0x1b, 0x00, 0x00, 0xff, 0xff, 0xe0, 0x08, - 0x45, 0x85, 0xe8, 0x02, 0x00, 0x00, + // 460 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x53, 0xbf, 0x6f, 0xd3, 0x40, + 0x14, 0xce, 0x41, 0x92, 0x92, 0x97, 0x88, 0xe1, 0xa9, 0x50, 0x13, 0x55, 0x29, 0x78, 0x40, 0x15, + 0x43, 0x5c, 0x05, 0xa9, 0x48, 0x4c, 0x10, 0x02, 0x84, 0x81, 0xaa, 0xba, 0x66, 0xea, 0x76, 0x71, + 0x9e, 0xdc, 0x13, 0xee, 0x9d, 0xf1, 0x5d, 0x0a, 0x9e, 0xf9, 0xd3, 0x58, 0x18, 0xd9, 0x91, 0x10, + 0xca, 0x9f, 0xc1, 0x84, 0x7c, 0xbe, 0xaa, 0x8e, 0x48, 0xa4, 0x2e, 0xb9, 0xf7, 0xee, 0xfb, 0xee, + 0xcb, 0xf7, 0x7e, 0x18, 0x22, 0x4b, 0x29, 0x65, 0x3a, 0xb7, 0x51, 0x4a, 0x89, 0x88, 0x8b, 0x28, + 0x4e, 0x25, 0x29, 0x1b, 0x65, 0xb9, 0xb6, 0xba, 0xfc, 0xfd, 0x5a, 0x18, 0xca, 0xaf, 0x64, 0x4c, + 0x43, 0x77, 0x85, 0x2d, 0x77, 0xf4, 0x77, 0x13, 0x9d, 0xe8, 0x8a, 0x54, 0x46, 0x15, 0x18, 0x7e, + 0x67, 0xd0, 0x7a, 0x97, 0x8b, 0x4b, 0xc2, 0x63, 0xe8, 0x4e, 0xa4, 0x48, 0x39, 0x7d, 0x5e, 0x92, + 0xb1, 0x01, 0x7b, 0xcc, 0x0e, 0xbb, 0x23, 0xac, 0x68, 0xc3, 0x1a, 0x32, 0x6d, 0xf0, 0x3a, 0x11, + 0x67, 0xf0, 0xe0, 0x8d, 0x56, 0x8a, 0x62, 0x2b, 0xb5, 0x7a, 0x6b, 0xac, 0x98, 0xa7, 0xd2, 0x5c, + 0xd0, 0x22, 0xb8, 0xe3, 0x14, 0xf6, 0xbd, 0xc2, 0x46, 0xce, 0xb4, 0xc1, 0x37, 0x3f, 0xc6, 0x27, + 0xd0, 0x9c, 0x08, 0x2b, 0x82, 0xbb, 0x4e, 0xa4, 0x7b, 0x6d, 0x43, 0x58, 0x31, 0x6d, 0x70, 0x07, + 0x8d, 0x3b, 0xb0, 0xf3, 0x91, 0x8c, 0x11, 0x09, 0x85, 0xbf, 0xd8, 0x9a, 0x79, 0x7c, 0x08, 0xed, + 0x13, 0xbd, 0xa0, 0x0f, 0x13, 0x57, 0x46, 0x87, 0xfb, 0x0c, 0xcf, 0x01, 0x66, 0x4b, 0xa5, 0x28, + 0x9d, 0x15, 0x19, 0x39, 0x83, 0x9d, 0xf1, 0xcb, 0xbf, 0xbf, 0x0f, 0x8e, 0x13, 0x69, 0x2f, 0x96, + 0xf3, 0x61, 0xac, 0x2f, 0xa3, 0x24, 0x17, 0x57, 0xd2, 0x8a, 0xd2, 0x90, 0x48, 0x6f, 0x9a, 0x2d, + 0x32, 0x19, 0xd9, 0x22, 0x23, 0x33, 0xbc, 0x51, 0xe0, 0x35, 0x35, 0x7c, 0x0a, 0xed, 0x33, 0xbd, + 0xcc, 0x63, 0xf2, 0x9e, 0xef, 0x7b, 0xcf, 0x27, 0x64, 0x5f, 0x2f, 0x16, 0x39, 0xf7, 0x28, 0x1e, + 0x41, 0x77, 0x42, 0xc6, 0x4a, 0xe5, 0xfe, 0x22, 0x68, 0x6e, 0x24, 0xd7, 0x29, 0xe1, 0x0b, 0xd8, + 0xf1, 0xf7, 0x18, 0xb8, 0xf0, 0x8b, 0xce, 0x3f, 0xf9, 0xca, 0xae, 0x53, 0x44, 0x68, 0x96, 0x8c, + 0xaa, 0x28, 0xee, 0xe2, 0x70, 0xbf, 0x6a, 0x22, 0xee, 0x42, 0x6b, 0x5c, 0x58, 0x32, 0xee, 0x4d, + 0x8f, 0x57, 0x49, 0xb8, 0xb7, 0x65, 0x70, 0xe1, 0x23, 0xd8, 0x3b, 0x2d, 0xd7, 0xe8, 0xac, 0x5a, + 0xa3, 0x53, 0xa9, 0x12, 0xdf, 0xd8, 0xb0, 0x0f, 0xc1, 0xff, 0x90, 0xc9, 0xb4, 0x32, 0x34, 0xfa, + 0xc6, 0xa0, 0x57, 0x07, 0xf1, 0x19, 0xdc, 0x2b, 0x87, 0x52, 0xf6, 0x1e, 0x7b, 0xbe, 0x40, 0xb7, + 0x6b, 0xfd, 0xb5, 0xec, 0x90, 0x1d, 0x31, 0x7c, 0x0f, 0xcd, 0x52, 0x0c, 0x07, 0x1e, 0xd9, 0x62, + 0xa0, 0x7f, 0xb0, 0x15, 0xaf, 0x5c, 0x8c, 0x5f, 0xfd, 0x58, 0x0d, 0xd8, 0xcf, 0xd5, 0x80, 0xfd, + 0x59, 0x0d, 0xd8, 0xf9, 0xe8, 0x76, 0x83, 0xad, 0x7f, 0x42, 0xf3, 0xb6, 0x3b, 0x9e, 0xff, 0x0b, + 0x00, 0x00, 0xff, 0xff, 0xf7, 0xa9, 0x04, 0x60, 0x69, 0x03, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -410,6 +492,8 @@ const _ = grpc.SupportPackageIsVersion4 type ProxyServiceClient interface { // DialNode opens a bidrectional stream to the requested node. DialNode(ctx context.Context, opts ...grpc.CallOption) (ProxyService_DialNodeClient, error) + // Ping checks if the peer is reachable and responsive. + Ping(ctx context.Context, in *ProxyServicePingRequest, opts ...grpc.CallOption) (*ProxyServicePingResponse, error) } type proxyServiceClient struct { @@ -451,10 +535,21 @@ func (x *proxyServiceDialNodeClient) Recv() (*Frame, error) { return m, nil } +func (c *proxyServiceClient) Ping(ctx context.Context, in *ProxyServicePingRequest, opts ...grpc.CallOption) (*ProxyServicePingResponse, error) { + out := new(ProxyServicePingResponse) + err := c.cc.Invoke(ctx, "/proto.ProxyService/Ping", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // ProxyServiceServer is the server API for ProxyService service. type ProxyServiceServer interface { // DialNode opens a bidrectional stream to the requested node. DialNode(ProxyService_DialNodeServer) error + // Ping checks if the peer is reachable and responsive. + Ping(context.Context, *ProxyServicePingRequest) (*ProxyServicePingResponse, error) } // UnimplementedProxyServiceServer can be embedded to have forward compatible implementations. @@ -464,6 +559,9 @@ type UnimplementedProxyServiceServer struct { func (*UnimplementedProxyServiceServer) DialNode(srv ProxyService_DialNodeServer) error { return status.Errorf(codes.Unimplemented, "method DialNode not implemented") } +func (*UnimplementedProxyServiceServer) Ping(ctx context.Context, req *ProxyServicePingRequest) (*ProxyServicePingResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Ping not implemented") +} func RegisterProxyServiceServer(s *grpc.Server, srv ProxyServiceServer) { s.RegisterService(&_ProxyService_serviceDesc, srv) @@ -495,10 +593,33 @@ func (x *proxyServiceDialNodeServer) Recv() (*Frame, error) { return m, nil } +func _ProxyService_Ping_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ProxyServicePingRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ProxyServiceServer).Ping(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/proto.ProxyService/Ping", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ProxyServiceServer).Ping(ctx, req.(*ProxyServicePingRequest)) + } + return interceptor(ctx, in, info, handler) +} + var _ProxyService_serviceDesc = grpc.ServiceDesc{ ServiceName: "proto.ProxyService", HandlerType: (*ProxyServiceServer)(nil), - Methods: []grpc.MethodDesc{}, + Methods: []grpc.MethodDesc{ + { + MethodName: "Ping", + Handler: _ProxyService_Ping_Handler, + }, + }, Streams: []grpc.StreamDesc{ { StreamName: "DialNode", @@ -776,6 +897,60 @@ func (m *ConnectionEstablished) MarshalToSizedBuffer(dAtA []byte) (int, error) { return len(dAtA) - i, nil } +func (m *ProxyServicePingRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ProxyServicePingRequest) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ProxyServicePingRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + return len(dAtA) - i, nil +} + +func (m *ProxyServicePingResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ProxyServicePingResponse) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ProxyServicePingResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + return len(dAtA) - i, nil +} + func encodeVarintProxyservice(dAtA []byte, offset int, v uint64) int { offset -= sovProxyservice(v) base := offset @@ -914,6 +1089,30 @@ func (m *ConnectionEstablished) Size() (n int) { return n } +func (m *ProxyServicePingRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *ProxyServicePingResponse) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + func sovProxyservice(x uint64) (n int) { return (math_bits.Len64(x|1) + 6) / 7 } @@ -1514,6 +1713,108 @@ func (m *ConnectionEstablished) Unmarshal(dAtA []byte) error { } return nil } +func (m *ProxyServicePingRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProxyservice + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ProxyServicePingRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ProxyServicePingRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipProxyservice(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthProxyservice + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ProxyServicePingResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProxyservice + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ProxyServicePingResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ProxyServicePingResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipProxyservice(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthProxyservice + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func skipProxyservice(dAtA []byte) (n int, err error) { l := len(dAtA) iNdEx := 0 diff --git a/api/proto/teleport/legacy/client/proto/proxyservice.proto b/api/proto/teleport/legacy/client/proto/proxyservice.proto index 0cabcc6f2c8e8..30940ef7c932e 100644 --- a/api/proto/teleport/legacy/client/proto/proxyservice.proto +++ b/api/proto/teleport/legacy/client/proto/proxyservice.proto @@ -24,6 +24,9 @@ option go_package = "github.com/gravitational/teleport/api/client/proto"; service ProxyService { // DialNode opens a bidrectional stream to the requested node. rpc DialNode(stream Frame) returns (stream Frame); + + // Ping checks if the peer is reachable and responsive. + rpc Ping(ProxyServicePingRequest) returns (ProxyServicePingResponse); } // Frame wraps different message types to be sent over a stream. @@ -63,3 +66,7 @@ message Data { // ConnectionEstablished signals to the client a connection to the node has been established. message ConnectionEstablished {} + +message ProxyServicePingRequest {} + +message ProxyServicePingResponse {} diff --git a/lib/proxy/peer/client.go b/lib/proxy/peer/client.go index f9e9311088d6c..c84e31164001a 100644 --- a/lib/proxy/peer/client.go +++ b/lib/proxy/peer/client.go @@ -169,8 +169,10 @@ type grpcClientConn struct { cc *grpc.ClientConn metrics *clientMetrics - id string - addr string + id string + addr string + host string + group string // if closing is set, count is not allowed to increase from zero; upon // reaching zero, cond should be broadcast @@ -178,6 +180,8 @@ type grpcClientConn struct { cond sync.Cond closing bool count int + + pingCancel context.CancelFunc } var _ internal.ClientConn = (*grpcClientConn)(nil) @@ -211,7 +215,7 @@ func (c *grpcClientConn) maybeAcquire() (release func()) { // Shutdown implements [internal.ClientConn]. func (c *grpcClientConn) Shutdown(ctx context.Context) { - defer c.cc.Close() + defer c.Close() c.mu.Lock() defer c.mu.Unlock() @@ -232,9 +236,25 @@ func (c *grpcClientConn) Shutdown(ctx context.Context) { // Close implements [internal.ClientConn]. func (c *grpcClientConn) Close() error { + c.pingCancel() return c.cc.Close() } +// Ping implements [internal.ClientConn]. +func (c *grpcClientConn) Ping(ctx context.Context) error { + release := c.maybeAcquire() + if release == nil { + return trace.ConnectionProblem(nil, "error starting stream: connection is shutting down") + } + defer release() + + _, err := clientapi.NewProxyServiceClient(c.cc).Ping(ctx, new(clientapi.ProxyServicePingRequest)) + if trace.IsNotImplemented(err) { + err = nil + } + return trace.Wrap(err) +} + // Dial implements [internal.ClientConn]. func (c *grpcClientConn) Dial( nodeID string, @@ -461,7 +481,14 @@ func (c *Client) updateConnections(proxies []types.Server) error { // establish new connections supportsQUIC, _ := proxy.GetLabel(types.UnstableProxyPeerQUICLabel) - conn, err := c.connect(id, proxy.GetPeerAddr(), supportsQUIC == "yes") + proxyGroup, _ := proxy.GetLabel(types.ProxyGroupIDLabel) + conn, err := c.connect(connectParams{ + peerID: id, + peerAddr: proxy.GetPeerAddr(), + peerHost: proxy.GetHostname(), + peerGroup: proxyGroup, + supportsQUIC: supportsQUIC == "yes", + }) if err != nil { c.metrics.reportTunnelError(errorProxyPeerTunnelDial) c.config.Log.DebugContext(c.ctx, "error dialing peer proxy", "peer_id", id, "peer_addr", proxy.GetPeerAddr()) @@ -662,7 +689,14 @@ func (c *Client) getConnections(proxyIDs []string) ([]internal.ClientConn, bool, } supportsQUIC, _ := proxy.GetLabel(types.UnstableProxyPeerQUICLabel) - conn, err := c.connect(id, proxy.GetPeerAddr(), supportsQUIC == "yes") + proxyGroup, _ := proxy.GetLabel(types.ProxyGroupIDLabel) + conn, err := c.connect(connectParams{ + peerID: id, + peerAddr: proxy.GetPeerAddr(), + peerHost: proxy.GetHostname(), + peerGroup: proxyGroup, + supportsQUIC: supportsQUIC == "yes", + }) if err != nil { c.metrics.reportTunnelError(errorProxyPeerTunnelDirectDial) c.config.Log.DebugContext(c.ctx, "error direct dialing peer proxy", "peer_id", id, "peer_addr", proxy.GetPeerAddr()) @@ -689,9 +723,17 @@ func (c *Client) getConnections(proxyIDs []string) ([]internal.ClientConn, bool, return conns, false, nil } -// connect dials a new connection to proxyAddr. -func (c *Client) connect(peerID string, peerAddr string, supportsQUIC bool) (internal.ClientConn, error) { - if supportsQUIC && c.config.QUICTransport != nil { +type connectParams struct { + peerID string + peerAddr string + peerHost string + peerGroup string + supportsQUIC bool +} + +// connect dials a new connection to a peer proxy with the given ID and address. +func (c *Client) connect(params connectParams) (internal.ClientConn, error) { + if params.supportsQUIC && c.config.QUICTransport != nil { panic("QUIC proxy peering is not implemented") } tlsConfig := utils.TLSConfig(c.config.TLSCipherSuites) @@ -706,11 +748,11 @@ func (c *Client) connect(peerID string, peerAddr string, supportsQUIC bool) (int tlsConfig.InsecureSkipVerify = true tlsConfig.VerifyConnection = utils.VerifyConnectionWithRoots(c.config.GetTLSRoots) - expectedPeer := authclient.HostFQDN(peerID, c.config.ClusterName) + expectedPeer := authclient.HostFQDN(params.peerID, c.config.ClusterName) conn, err := grpc.Dial( - peerAddr, - grpc.WithTransportCredentials(newClientCredentials(expectedPeer, peerAddr, c.config.Log, credentials.NewTLS(tlsConfig))), + params.peerAddr, + grpc.WithTransportCredentials(newClientCredentials(expectedPeer, params.peerAddr, c.config.Log, credentials.NewTLS(tlsConfig))), grpc.WithStatsHandler(newStatsHandler(c.reporter)), grpc.WithChainStreamInterceptor(metadata.StreamClientInterceptor, interceptors.GRPCClientStreamErrorInterceptor), grpc.WithKeepaliveParams(keepalive.ClientParameters{ @@ -721,14 +763,29 @@ func (c *Client) connect(peerID string, peerAddr string, supportsQUIC bool) (int grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"round_robin"}`), ) if err != nil { - return nil, trace.Wrap(err, "Error dialing proxy %q", peerID) + return nil, trace.Wrap(err, "Error dialing proxy %q", params.peerID) } - return &grpcClientConn{ + pingCtx, pingCancel := context.WithCancel(context.Background()) + cc := &grpcClientConn{ cc: conn, metrics: c.metrics, - id: peerID, - addr: peerAddr, - }, nil + id: params.peerID, + addr: params.peerAddr, + host: params.peerHost, + group: params.peerGroup, + + pingCancel: pingCancel, + } + + pings, pingFailures := internal.ClientPingsMetrics(internal.ClientPingsMetricsParams{ + LocalID: c.config.ID, + PeerID: params.peerID, + PeerHost: params.peerHost, + PeerGroup: params.peerGroup, + }) + go internal.RunClientPing(pingCtx, cc, pings, pingFailures) + + return cc, nil } diff --git a/lib/proxy/peer/client_test.go b/lib/proxy/peer/client_test.go index 8bdc70946be09..03e472b4078db 100644 --- a/lib/proxy/peer/client_test.go +++ b/lib/proxy/peer/client_test.go @@ -145,7 +145,13 @@ func TestCAChange(t *testing.T) { // dial server and send a test data frame const supportsQUICFalse = false - conn, err := client.connect("s1", ts.GetPeerAddr(), supportsQUICFalse) + conn, err := client.connect(connectParams{ + peerID: "s1", + peerAddr: ts.GetPeerAddr(), + peerHost: "s1", + peerGroup: "", + supportsQUIC: supportsQUICFalse, + }) require.NoError(t, err) require.NotNil(t, conn) require.IsType(t, (*grpcClientConn)(nil), conn) @@ -163,7 +169,13 @@ func TestCAChange(t *testing.T) { // new connection should fail because client tls config still references old // RootCAs. - conn, err = client.connect("s1", ts.GetPeerAddr(), supportsQUICFalse) + conn, err = client.connect(connectParams{ + peerID: "s1", + peerAddr: ts.GetPeerAddr(), + peerHost: "s1", + peerGroup: "", + supportsQUIC: supportsQUICFalse, + }) require.NoError(t, err) require.NotNil(t, conn) require.IsType(t, (*grpcClientConn)(nil), conn) @@ -175,7 +187,13 @@ func TestCAChange(t *testing.T) { // RootCAs. currentServerCA.Store(newServerCA) - conn, err = client.connect("s1", ts.GetPeerAddr(), supportsQUICFalse) + conn, err = client.connect(connectParams{ + peerID: "s1", + peerAddr: ts.GetPeerAddr(), + peerHost: "s1", + peerGroup: "", + supportsQUIC: supportsQUICFalse, + }) require.NoError(t, err) require.NotNil(t, conn) require.IsType(t, (*grpcClientConn)(nil), conn) diff --git a/lib/proxy/peer/helpers_test.go b/lib/proxy/peer/helpers_test.go index 8880edf428021..7f1c50fe28f6a 100644 --- a/lib/proxy/peer/helpers_test.go +++ b/lib/proxy/peer/helpers_test.go @@ -57,6 +57,7 @@ type mockProxyAccessPoint struct { } type mockProxyService struct { + clientapi.UnimplementedProxyServiceServer mockDialNode func(stream clientapi.ProxyService_DialNodeServer) error } diff --git a/lib/proxy/peer/internal/clientconn.go b/lib/proxy/peer/internal/clientconn.go index f44e64afd7b52..b4fa423f07c03 100644 --- a/lib/proxy/peer/internal/clientconn.go +++ b/lib/proxy/peer/internal/clientconn.go @@ -40,6 +40,9 @@ type ClientConn interface { tunnelType types.TunnelType, ) (net.Conn, error) + // Ping checks if the peer is reachable and responsive. + Ping(context.Context) error + // Close closes all connections and releases any background resources // immediately. Close() error diff --git a/lib/proxy/peer/internal/metrics.go b/lib/proxy/peer/internal/metrics.go new file mode 100644 index 0000000000000..802d33746f774 --- /dev/null +++ b/lib/proxy/peer/internal/metrics.go @@ -0,0 +1,109 @@ +// Teleport +// Copyright (C) 2024 Gravitational, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package internal + +import ( + "context" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/api/utils/retryutils" + "github.com/gravitational/teleport/lib/utils/interval" +) + +var ( + clientPingInitOnce sync.Once + + clientPingsTotal *prometheus.CounterVec + clientFailedPingsTotal *prometheus.CounterVec +) + +func clientPingInit() { + clientPingsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: "proxy_peer_client", + Name: "pings_total", + Help: "Total number of proxy peering client pings per peer proxy, both successful and failed.", + }, []string{"local_id", "peer_id", "peer_host", "peer_group"}) + + clientFailedPingsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: "proxy_peer_client", + Name: "failed_pings_total", + Help: "Total number of failed proxy peering client pings per peer proxy.", + }, []string{"local_id", "peer_id", "peer_host", "peer_group"}) +} + +// ClientPingsMetricsParams contains the parameters for [ClientPingsMetrics]. +type ClientPingsMetricsParams struct { + // LocalID is the host ID of the current proxy. + LocalID string + // PeerID is the host ID of the peer proxy. + PeerID string + // PeerHost is the hostname of the peer proxy. + PeerHost string + // PeerGroup is the peer group ID of the peer proxy. Can be blank. + PeerGroup string +} + +// ClientPingsMetrics returns the Prometheus metrics for a given peer proxy, +// given host ID, hostname and (optionally) peer group. +func ClientPingsMetrics(params ClientPingsMetricsParams) (pings, failedPings prometheus.Counter) { + clientPingInitOnce.Do(clientPingInit) + + pings = clientPingsTotal.WithLabelValues(params.LocalID, params.PeerID, params.PeerHost, params.PeerGroup) + failedPings = clientFailedPingsTotal.WithLabelValues(params.LocalID, params.PeerID, params.PeerHost, params.PeerGroup) + + return pings, failedPings +} + +// RunClientPing periodically pings the peer proxy reachable through the given +// [ClientConn], accumulating counts in the given Prometheus metrics. Returns +// when the context is canceled. +func RunClientPing(ctx context.Context, cc ClientConn, pings, failedPings prometheus.Counter) { + const pingInterval = time.Minute + ivl := interval.New(interval.Config{ + Duration: pingInterval * 14 / 13, + FirstDuration: retryutils.HalfJitter(pingInterval), + Jitter: retryutils.SeventhJitter, + }) + defer ivl.Stop() + + for ctx.Err() == nil { + select { + case <-ivl.Next(): + func() { + timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + err := cc.Ping(timeoutCtx) + if err != nil { + if ctx.Err() != nil { + return + } + failedPings.Inc() + } + pings.Inc() + }() + case <-ctx.Done(): + } + } +} diff --git a/lib/proxy/peer/service.go b/lib/proxy/peer/service.go index 31c08b6a7d66d..db5a93c635921 100644 --- a/lib/proxy/peer/service.go +++ b/lib/proxy/peer/service.go @@ -19,6 +19,7 @@ package peer import ( + "context" "log/slog" "strings" @@ -106,6 +107,10 @@ func (s *proxyService) DialNode(stream proto.ProxyService_DialNodeServer) error return trace.Wrap(err) } +func (s *proxyService) Ping(ctx context.Context, _ *proto.ProxyServicePingRequest) (*proto.ProxyServicePingResponse, error) { + return new(proto.ProxyServicePingResponse), nil +} + // splitServerID splits a server id in to a node id and cluster name. func splitServerID(address string) (string, string, error) { split := strings.Split(address, ".")