diff --git a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go index 00548593c..ddd989353 100644 --- a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go +++ b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go @@ -53,6 +53,7 @@ import ( const ( podResourcesClientTimeout = 10 * time.Second + getTopologyZonesTimeout = 10 * time.Second podResourcesClientMaxMsgSize = 1024 * 1024 * 16 ) @@ -139,6 +140,8 @@ func (p *topologyAdapterImpl) GetTopologyZones(parentCtx context.Context) ([]*no // always force getting pod list instead of cache ctx := context.WithValue(parentCtx, metaserverpod.BypassCacheKey, metaserverpod.BypassCacheTrue) + ctx, cancel := context.WithTimeout(ctx, getTopologyZonesTimeout) + defer cancel() podList, err := p.metaServer.GetPodList(ctx, nil) if err != nil { return nil, errors.Wrap(err, "get pod list from metaServer failed") diff --git a/pkg/agent/resourcemanager/fetcher/plugin/endpoint.go b/pkg/agent/resourcemanager/fetcher/plugin/endpoint.go index a4b4c0b11..eb6e1c399 100644 --- a/pkg/agent/resourcemanager/fetcher/plugin/endpoint.go +++ b/pkg/agent/resourcemanager/fetcher/plugin/endpoint.go @@ -33,6 +33,7 @@ import ( const ( dialRemoteEndpointTimeout = 10 * time.Second + getReportContentTimeout = 10 * time.Second ) // ListAndWatchCallback should be called when plugins report info update. @@ -173,7 +174,9 @@ func (e *remoteEndpointImpl) GetReportContent(c context.Context) (*v1alpha1.GetR return nil, fmt.Errorf("endpoint %v has been stopped", e.pluginName) } - resp, err := e.client.GetReportContent(c, &v1alpha1.Empty{}) + ctx, cancel := context.WithTimeout(c, getReportContentTimeout) + defer cancel() + resp, err := e.client.GetReportContent(ctx, &v1alpha1.Empty{}) if err == nil { e.setCache(resp) } diff --git a/pkg/agent/sysadvisor/plugin/overcommitmentaware/reporter/reporter.go b/pkg/agent/sysadvisor/plugin/overcommitmentaware/reporter/reporter.go index 05e921c4e..292a9ee73 100644 --- a/pkg/agent/sysadvisor/plugin/overcommitmentaware/reporter/reporter.go +++ b/pkg/agent/sysadvisor/plugin/overcommitmentaware/reporter/reporter.go @@ -153,7 +153,7 @@ func (o *OvercommitRatioReporterPlugin) Stop() error { // GetReportContent get overcommitment ratio from manager directly. // Since the metrics collected by Manager are already an average within a time period, // we expect a faster response to node load fluctuations to avoid excessive overcommit of online resources. -func (o *OvercommitRatioReporterPlugin) GetReportContent(_ context.Context, _ *v1alpha1.Empty) (*v1alpha1.GetReportContentResponse, error) { +func (o *OvercommitRatioReporterPlugin) GetReportContent(ctx context.Context, _ *v1alpha1.Empty) (*v1alpha1.GetReportContentResponse, error) { response := &v1alpha1.GetReportContentResponse{ Content: []*v1alpha1.ReportContent{}, } @@ -173,7 +173,7 @@ func (o *OvercommitRatioReporterPlugin) GetReportContent(_ context.Context, _ *v response.Content = append(response.Content, overcommitRatioContent) // get topologyProvider and guaranteed cpus - topologyProviderContent, err := o.getTopologyProviderReportContent(o.ctx) + topologyProviderContent, err := o.getTopologyProviderReportContent(ctx) if err != nil { return nil, err }