Skip to content

Commit

Permalink
Merge pull request #9135 from sridhartigera/netlink-err-handling
Browse files Browse the repository at this point in the history
[Felix] Add retries when netlink list APIs encounter EINTR.
  • Loading branch information
sridhartigera authored Aug 15, 2024
2 parents 36efc94 + 4da6094 commit 05f32ea
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 11 deletions.
2 changes: 2 additions & 0 deletions felix/dataplane/linux/bpf_ep_mgr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ func (m *mockDataplane) loadDefaultPolicies() error {
}

func (m *mockDataplane) ensureProgramAttached(ap attachPoint) (qDiscInfo, error) {
m.mutex.Lock()
defer m.mutex.Unlock()
var qdisc qDiscInfo
key := ap.IfaceName() + ":" + ap.HookName().String()
m.numAttaches[key] = m.numAttaches[key] + 1
Expand Down
12 changes: 10 additions & 2 deletions felix/dataplane/linux/int_dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import (
"github.com/projectcalico/calico/felix/jitter"
"github.com/projectcalico/calico/felix/labelindex"
"github.com/projectcalico/calico/felix/logutils"
"github.com/projectcalico/calico/felix/netlinkshim"
"github.com/projectcalico/calico/felix/nftables"
"github.com/projectcalico/calico/felix/proto"
"github.com/projectcalico/calico/felix/routerule"
Expand Down Expand Up @@ -1106,9 +1107,16 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane {
// findHostMTU auto-detects the smallest host interface MTU.
func findHostMTU(matchRegex *regexp.Regexp) (int, error) {
// Find all the interfaces on the host.
links, err := netlink.LinkList()

nlHandle, err := netlinkshim.NewRealNetlink()
if err != nil {
log.WithError(err).Error("Failed to create netlink handle. Unable to auto-detect MTU.")
return 0, err
}

links, err := nlHandle.LinkList()
if err != nil {
log.WithError(err).Error("Failed to list interfaces. Unable to auto-detect MTU")
log.WithError(err).Error("Failed to list interfaces. Unable to auto-detect MTU.")
return 0, err
}

Expand Down
12 changes: 11 additions & 1 deletion felix/dataplane/linux/ipip_mgr_netlink.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
package intdataplane

import (
"errors"
"os/exec"

"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)

// ipipDataplane is a shim interface for mocking netlink and os/exec in the IPIP manager.
Expand Down Expand Up @@ -45,7 +47,15 @@ func (r realIPIPNetlink) LinkSetUp(link netlink.Link) error {
}

func (r realIPIPNetlink) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) {
return netlink.AddrList(link, family)
retries := 3
for {
addrs, err := netlink.AddrList(link, family)
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
return addrs, err
}
}

func (r realIPIPNetlink) AddrAdd(link netlink.Link, addr *netlink.Addr) error {
Expand Down
3 changes: 2 additions & 1 deletion felix/dataplane/linux/vxlan_mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/projectcalico/calico/felix/ip"
"github.com/projectcalico/calico/felix/ipsets"
"github.com/projectcalico/calico/felix/logutils"
"github.com/projectcalico/calico/felix/netlinkshim"
"github.com/projectcalico/calico/felix/proto"
"github.com/projectcalico/calico/felix/routetable"
"github.com/projectcalico/calico/felix/rules"
Expand Down Expand Up @@ -103,7 +104,7 @@ func newVXLANManager(
opRecorder logutils.OpRecorder,
ipVersion uint8,
) *vxlanManager {
nlHandle, _ := netlink.NewHandle(syscall.NETLINK_ROUTE)
nlHandle, _ := netlinkshim.NewRealNetlink()
return newVXLANManagerWithShims(
ipsetsDataplane,
mainRouteTable,
Expand Down
5 changes: 2 additions & 3 deletions felix/fv/etcd_restart_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ import (
. "github.com/onsi/gomega"
"github.com/sirupsen/logrus"

"github.com/vishvananda/netlink"

api "github.com/projectcalico/api/pkg/apis/projectcalico/v3"

"github.com/projectcalico/calico/felix/fv/containers"
"github.com/projectcalico/calico/felix/fv/infrastructure"
"github.com/projectcalico/calico/felix/fv/metrics"
"github.com/projectcalico/calico/felix/fv/netlinkutils"
"github.com/projectcalico/calico/felix/fv/utils"
"github.com/projectcalico/calico/felix/fv/workload"
client "github.com/projectcalico/calico/libcalico-go/lib/clientv3"
Expand All @@ -60,7 +59,7 @@ var _ = Context("etcd connection interruption", func() {
// Wait until the tunl0 device appears; it is created when felix inserts the ipip module
// into the kernel.
Eventually(func() error {
links, err := netlink.LinkList()
links, err := netlinkutils.LinkListRetryEINTR()
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions felix/fv/ipip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/projectcalico/calico/felix/fv/utils"

log "github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"

api "github.com/projectcalico/api/pkg/apis/projectcalico/v3"
"github.com/projectcalico/api/pkg/lib/numorstring"
Expand All @@ -45,6 +44,7 @@ import (

"github.com/projectcalico/calico/felix/fv/containers"
"github.com/projectcalico/calico/felix/fv/infrastructure"
"github.com/projectcalico/calico/felix/fv/netlinkutils"
"github.com/projectcalico/calico/felix/fv/workload"
)

Expand Down Expand Up @@ -72,7 +72,7 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ IPIP topology before adding
// Wait until the tunl0 device appears; it is created when felix inserts the ipip module
// into the kernel.
Eventually(func() error {
links, err := netlink.LinkList()
links, err := netlinkutils.LinkListRetryEINTR()
if err != nil {
return err
}
Expand Down
36 changes: 36 additions & 0 deletions felix/fv/netlinkutils/netlinkutils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (c) 2024 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netlinkutils

import (
log "github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"

"github.com/projectcalico/calico/felix/netlinkshim"
)

func LinkListRetryEINTR() ([]netlink.Link, error) {
nlHandle, err := netlinkshim.NewRealNetlink()
if err != nil {
log.WithError(err).Error("failed to created netlink handle. Unable to list interfaces")
return []netlink.Link{}, err
}

links, err := nlHandle.LinkList()
if err != nil {
log.WithError(err).Error("Failed to list interfaces")
}
return links, err
}
5 changes: 4 additions & 1 deletion felix/fv/test-workload/test-workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package main
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io"
"net"
Expand All @@ -26,6 +27,8 @@ import (
"strings"
"time"

"golang.org/x/sys/unix"

"github.com/projectcalico/calico/felix/fv/cgroup"
"github.com/projectcalico/calico/felix/fv/connectivity"
"github.com/projectcalico/calico/felix/fv/utils"
Expand Down Expand Up @@ -149,7 +152,7 @@ func main() {
// link local address that can be used as a next hop.
// Just fetch the address of the host end of the veth and use it as the next hop.
addresses, err := netlink.AddrList(veth, netlink.FAMILY_V6)
if err != nil {
if err != nil && !errors.Is(err, unix.EINTR) {
log.WithError(err).Panic("Error listing IPv6 addresses for the host side of the veth pair")
}

Expand Down
157 changes: 156 additions & 1 deletion felix/netlinkshim/netlink.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
package netlinkshim

import (
"errors"
"syscall"
"time"

"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)

type Interface interface {
Expand Down Expand Up @@ -47,6 +49,159 @@ type Interface interface {
NeighDel(a *netlink.Neigh) error
}

type RealNetlink struct {
nlHandle *netlink.Handle
}

func NewRealNetlink() (Interface, error) {
return netlink.NewHandle(syscall.NETLINK_ROUTE)
nlHandle, err := netlink.NewHandle(syscall.NETLINK_ROUTE)
if err != nil {
return nil, err
}
return &RealNetlink{
nlHandle: nlHandle,
}, nil
}

func (r *RealNetlink) SetSocketTimeout(to time.Duration) error {
return r.nlHandle.SetSocketTimeout(to)
}

func (r *RealNetlink) SetStrictCheck(b bool) error {
return r.nlHandle.SetStrictCheck(b)
}

func (r *RealNetlink) LinkList() ([]netlink.Link, error) {
retries := 3
for {
links, err := r.nlHandle.LinkList()
if err != nil {
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
}
return links, err
}
}

func (r *RealNetlink) LinkByName(name string) (netlink.Link, error) {
return r.nlHandle.LinkByName(name)
}

func (r *RealNetlink) LinkAdd(link netlink.Link) error {
return r.nlHandle.LinkAdd(link)
}

func (r *RealNetlink) LinkDel(link netlink.Link) error {
return r.nlHandle.LinkDel(link)
}

func (r *RealNetlink) LinkSetMTU(link netlink.Link, mtu int) error {
return r.nlHandle.LinkSetMTU(link, mtu)
}

func (r *RealNetlink) LinkSetUp(link netlink.Link) error {
return r.nlHandle.LinkSetUp(link)
}

func (r *RealNetlink) RouteListFiltered(family int, filter *netlink.Route, filterMask uint64) ([]netlink.Route, error) {
retries := 3
for {
routes, err := r.nlHandle.RouteListFiltered(family, filter, filterMask)
if err != nil {
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
}
return routes, err
}
}

func (r *RealNetlink) RouteAdd(route *netlink.Route) error {
return r.nlHandle.RouteAdd(route)
}

func (r *RealNetlink) RouteReplace(route *netlink.Route) error {
return r.nlHandle.RouteReplace(route)
}

func (r *RealNetlink) RouteDel(route *netlink.Route) error {
return r.nlHandle.RouteDel(route)
}

func (r *RealNetlink) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) {
retries := 3
for {
addrs, err := r.nlHandle.AddrList(link, family)
if err != nil {
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
}
return addrs, err
}
}

func (r *RealNetlink) AddrAdd(link netlink.Link, addr *netlink.Addr) error {
return r.nlHandle.AddrAdd(link, addr)
}

func (r *RealNetlink) AddrDel(link netlink.Link, addr *netlink.Addr) error {
return r.nlHandle.AddrDel(link, addr)
}

func (r *RealNetlink) RuleList(family int) ([]netlink.Rule, error) {
retries := 3
for {
rules, err := r.nlHandle.RuleList(family)
if err != nil {
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
}
return rules, err
}
}

func (r *RealNetlink) RuleAdd(rule *netlink.Rule) error {
return r.nlHandle.RuleAdd(rule)
}

func (r *RealNetlink) RuleDel(rule *netlink.Rule) error {
return r.nlHandle.RuleDel(rule)
}

func (r *RealNetlink) Delete() {
//nolint:staticcheck
r.nlHandle.Delete()
}

func (r *RealNetlink) NeighAdd(neigh *netlink.Neigh) error {
return r.nlHandle.NeighAdd(neigh)
}

func (r *RealNetlink) NeighList(linkIndex, family int) ([]netlink.Neigh, error) {
retries := 3
for {
neighs, err := r.nlHandle.NeighList(linkIndex, family)
if err != nil {
if errors.Is(err, unix.EINTR) && retries > 0 {
retries--
continue
}
}
return neighs, err
}
}

func (r *RealNetlink) NeighSet(a *netlink.Neigh) error {
return r.nlHandle.NeighSet(a)
}

func (r *RealNetlink) NeighDel(a *netlink.Neigh) error {
return r.nlHandle.NeighDel(a)
}

0 comments on commit 05f32ea

Please sign in to comment.