diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 04c4d7a4bd83f..078e3aa04383e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -12641,6 +12641,12 @@ static inline void mlxsw_reg_tidem_pack(char *payload, u8 underlay_ecn, MLXSW_REG_DEFINE(sbpr, MLXSW_REG_SBPR_ID, MLXSW_REG_SBPR_LEN); +/* reg_sbpr_desc + * When set, configures descriptor buffer. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, desc, 0x00, 31, 1); + /* shared direstion enum for SBPR, SBCM, SBPM */ enum mlxsw_reg_sbxx_dir { MLXSW_REG_SBXX_DIR_INGRESS, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 98f26f596e30e..c68fc8f7ca99e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -202,6 +202,21 @@ static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, return 0; } +static int mlxsw_sp_sb_pr_desc_write(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, + u32 size, bool infi_size) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + + /* The FW default descriptor buffer configuration uses only pool 14 for + * descriptors. + */ + mlxsw_reg_sbpr_pack(sbpr_pl, 14, dir, mode, size, infi_size); + mlxsw_reg_sbpr_desc_set(sbpr_pl, true); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); +} + static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u16 local_port, u8 pg_buff, u32 min_buff, u32 max_buff, bool infi_max, u16 pool_index) @@ -775,6 +790,17 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; } + + err = mlxsw_sp_sb_pr_desc_write(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, + MLXSW_REG_SBPR_MODE_DYNAMIC, 0, true); + if (err) + return err; + + err = mlxsw_sp_sb_pr_desc_write(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, + MLXSW_REG_SBPR_MODE_DYNAMIC, 0, true); + if (err) + return err; + return 0; } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh new file mode 100755 index 0000000000000..82a47b903f928 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh @@ -0,0 +1,480 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sends 1Gbps of traffic through the switch, into which it then +# injects a burst of traffic and tests that there are no drops. +# +# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream +# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a +# 1Gbps $swp3. +# +# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to +# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog +# is generated. +# +# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is +# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The +# expectation is that the burst is wholly absorbed by the large pool and no +# drops are caused. After the burst, there should be a backlog that is hard to +# get rid of, because $sw2 is fully subscribed. But because each individual +# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the +# test. +# +# +-----------------------+ +-----------------------+ +# | H1 | | H3 | +# | + $h1.111 | | $h3.111 + | +# | | 192.0.2.33/28 | | 192.0.2.35/28 | | +# | | | | | | +# | + $h1 | | $h3 + | +# +---|-------------------+ +--------------------+ +------------------|----+ +# | | | | +# +---|----------------------|--------------------|----------------------|----+ +# | + $swp1 $swp3 + + $swp4 $swp5 | | +# | | iPOOL1 iPOOL0 | | iPOOL2 iPOOL2 | | +# | | ePOOL4 ePOOL5 | | ePOOL4 ePOOL4 | | +# | | 1Gbps | | 1Gbps | | +# | +-|----------------------|-+ +-|----------------------|-+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 $swp5.111 + | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 500KB dynamic | | +# | iPOOL2: 10MB dynamic + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 500KB dnamic | ePOOL6 | +# | ePOOL6: 10MB dynamic | 1Gbps | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | 1Gbps | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 are helper pools for control traffic etc. +# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream +# iPOOL2+ePOOL6 are pools for soaking the burst traffic + +ALL_TESTS=" + ping_ipv4 + test_8K + test_800 +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=8 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh +source mlxsw_lib.sh + +_1KB=1000 +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) + +# The failure mode that this specifically tests is exhaustion of descriptor +# buffer. The point is to produce a burst that shared buffer should be able +# to accommodate, but produce it with small enough packets that the machine +# runs out of the descriptor buffer space with default configuration. +# +# The machine therefore needs to be able to produce line rate with as small +# packets as possible, and at the same time have large enough buffer that +# when filled with these small packets, it runs out of descriptors. +# Spectrum-2 is very close, but cannot perform this test. Therefore use +# Spectrum-3 as a minimum, and permit larger burst size, and therefore +# larger packets, to reduce spurious failures. +# +mlxsw_only_on_spectrum 3+ || exit + +BURST_SIZE=$((50000000)) +POOL_SIZE=$BURST_SIZE + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + ethtool -s $h2 speed 1000 autoneg off + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + ethtool -s $h2 autoneg on + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.35/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + devlink_port_pool_th_save $swp5 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + devlink_tc_bind_pool_th_save $swp5 1 ingress + + # Control traffic pools. Just reduce the size. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Stream modeling pools. + devlink_pool_size_thtype_set 1 dynamic $_500KB + devlink_pool_size_thtype_set 5 dynamic $_500KB + + # Burst soak pools. + devlink_pool_size_thtype_set 2 static $POOL_SIZE + devlink_pool_size_thtype_set 6 static $POOL_SIZE + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 16 + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + + # Configure qdisc... + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + ethtool -s $swp2 speed 1000 autoneg off + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + ethtool -s $swp3 speed 1000 autoneg off + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 16 + devlink_tc_bind_pool_th_set $swp3 1 egress 5 16 + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + ethtool -s $swp4 speed 1000 autoneg off + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE + + # Configure qdisc... + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp4 prio-buffer all:0 1:1 + + # $swp5 + # ----- + + ip link set dev $swp5 up + mtu_set $swp5 10000 + vlan_create $swp5 111 + ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp5 2 $POOL_SIZE + devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE + + # Configure qdisc... + tc qdisc replace dev $swp5 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + # ... so that we can assign prio1 traffic to PG1. + dcb buffer set dev $swp5 prio-buffer all:0 1:1 + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev $swp5.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp5.111 nomaster + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp5 + # ----- + + dcb buffer set dev $swp5 prio-buffer all:0 + tc qdisc del dev $swp5 root + + devlink_tc_bind_pool_th_restore $swp5 1 ingress + devlink_port_pool_th_restore $swp5 2 + + vlan_destroy $swp5 111 + mtu_restore $swp5 + ip link set dev $swp5 down + + # $swp4 + # ----- + + dcb buffer set dev $swp4 prio-buffer all:0 + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + ethtool -s $swp4 autoneg on + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + ethtool -s $swp3 autoneg on + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + ethtool -s $swp2 autoneg on + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + dcb buffer set dev $swp1 prio-buffer all:0 + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + swp5=${NETIFS[p7]} + h3=${NETIFS[p8]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " h1->h2" + ping_test $h3 192.0.2.34 " h3->h2" +} + +__test_qos_burst() +{ + local pktsize=$1; shift + + RET=0 + + start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + sleep 1 + + local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + ((q0 == 0)) + check_err $? "Transmit queue non-zero?" + + local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + + local cell_size=$(devlink_cell_size_get) + local cells=$((BURST_SIZE / cell_size)) + # Each packet is $pktsize of payload + headers. + local pkt_cells=$(((pktsize + 50 + cell_size - 1) / cell_size)) + # How many packets can we admit: + local pkts=$((cells / pkt_cells)) + + $MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 1 + + local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + ((d1 == d0)) + check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))" + + # Check that the queue is somewhat close to the burst size This + # makes sure that the lack of drops above was not due to port + # undersubscribtion. + local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + local qe=$((90 * BURST_SIZE / 100)) + ((q0 > qe)) + check_err $? "Queue size expected >$qe, got $q0" + + stop_traffic + sleep 2 + + log_test "Burst: absorb $pkts ${pktsize}-B packets" +} + +test_8K() +{ + __test_qos_burst 8000 +} + +test_800() +{ + __test_qos_burst 800 +} + +bail_on_lldpad + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 5386c826e46aa..66681a2bcdd3f 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1375,25 +1375,40 @@ flood_test() __start_traffic() { + local pktsize=$1; shift local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift - $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize udp "$@" +} + +start_tcp_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize tcp "$@" +} + start_traffic() { - __start_traffic udp "$@" + start_traffic_pktsize 8000 "$@" } start_tcp_traffic() { - __start_traffic tcp "$@" + start_tcp_traffic_pktsize 8000 "$@" } stop_traffic()