Skip to content

Commit

Permalink
portmgrd: prevent runtime failure in setting MTU on portchannel membe…
Browse files Browse the repository at this point in the history
…r [PR sonic-net#3432]

Do not attempt to set the MTU directly on PortChannel members as it will
likely fail.  The MTU gets inherited as part of the PortChannel.

Signed-off-by: Brad House (@bradh352)
  • Loading branch information
bradh352 committed Jan 15, 2025
1 parent 6b41306 commit e226eb1
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 5 deletions.
64 changes: 60 additions & 4 deletions cfgmgr/portmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "exec.h"
#include "shellcmd.h"
#include <swss/redisutility.h>
#include <unordered_map>

using namespace std;
using namespace swss;
Expand All @@ -22,7 +23,40 @@ PortMgr::PortMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, c
{
}

bool PortMgr::setPortMtu(const string &alias, const string &mtu)
bool PortMgr::isLagMember(const std::string &alias, std::unordered_map<std::string, int> &lagMembers)
{
/* Lag members are lazily loaded on the first call to isLagMember and cached
* within a variable inside of doTask() for future calls.
*/
if (lagMembers.empty())
{
vector<string> keys;
m_cfgLagMemberTable.getKeys(keys);
for (auto key: keys)
{
auto tokens = tokenize(key, config_db_key_delimiter);
std::string member = tokens[1];
if (!member.empty()) {
lagMembers[member] = 1;
}
}

/* placeholder to state we already read lagmembers even though there are
* none */
if (lagMembers.empty()) {
lagMembers["none"] = 1;
}
}

if (lagMembers.find(alias) != lagMembers.end())
{
return true;
}

return false;
}

bool PortMgr::setPortMtu(const string &alias, const string &mtu, std::unordered_map<std::string, int> &lagMembers)
{
stringstream cmd;
string res, cmd_str;
Expand All @@ -43,6 +77,12 @@ bool PortMgr::setPortMtu(const string &alias, const string &mtu)
SWSS_LOG_WARN("Setting mtu to alias:%s netdev failed with cmd:%s, rc:%d, error:%s", alias.c_str(), cmd_str.c_str(), ret, res.c_str());
return false;
}
else if (isLagMember(alias, lagMembers))
{
// Catch user improperly specified an MTU on the PortChannel
SWSS_LOG_WARN("Setting mtu to alias:%s which is a member of a PortChannel is invalid", alias.c_str());
return false;
}
else
{
throw runtime_error(cmd_str + " : " + res);
Expand Down Expand Up @@ -128,10 +168,17 @@ void PortMgr::doSendToIngressPortTask(Consumer &consumer)

}


void PortMgr::doTask(Consumer &consumer)
{
SWSS_LOG_ENTER();

/* Variable to lazily cache lag members upon first call into isLagMember(). We
* don't want to always query for lag members if not needed, and we also don't
* want to query it for each call to isLagMember() which may be on every port.
*/
std::unordered_map<std::string, int> lagMembers;

auto table = consumer.getTableName();
if (table == CFG_SEND_TO_INGRESS_PORT_TABLE_NAME)
{
Expand All @@ -156,6 +203,7 @@ void PortMgr::doTask(Consumer &consumer)
bool portOk = isPortStateOk(alias);

string admin_status, mtu;
bool isMtuSet = false;
std::vector<FieldValueTuple> field_values;

bool configured = (m_portList.find(alias) != m_portList.end());
Expand All @@ -167,7 +215,6 @@ void PortMgr::doTask(Consumer &consumer)
{
admin_status = DEFAULT_ADMIN_STATUS_STR;
mtu = DEFAULT_MTU_STR;

m_portList.insert(alias);
}
else if (!portOk)
Expand All @@ -181,6 +228,10 @@ void PortMgr::doTask(Consumer &consumer)
if (fvField(i) == "mtu")
{
mtu = fvValue(i);
/* mtu read might be "", so we can't just use .empty() to
* know if its set. There's test cases that depend on this
* logic ... */
isMtuSet = true;
}
else if (fvField(i) == "admin_status")
{
Expand All @@ -192,6 +243,11 @@ void PortMgr::doTask(Consumer &consumer)
}
}

/* Clear default MTU if LAG member as this will otherwise fail. */
if (!isMtuSet && isLagMember(alias, lagMembers)) {
mtu = "";
}

if (!portOk)
{
// Port configuration is handled by the orchagent. If the configuration is written to the APP DB using
Expand Down Expand Up @@ -221,14 +277,14 @@ void PortMgr::doTask(Consumer &consumer)

if (!mtu.empty())
{
setPortMtu(alias, mtu);
SWSS_LOG_NOTICE("Configure %s MTU to %s", alias.c_str(), mtu.c_str());
setPortMtu(alias, mtu, lagMembers);
}

if (!admin_status.empty())
{
setPortAdminStatus(alias, admin_status == "up");
SWSS_LOG_NOTICE("Configure %s admin status to %s", alias.c_str(), admin_status.c_str());
setPortAdminStatus(alias, admin_status == "up");
}
}
else if (op == DEL_COMMAND)
Expand Down
3 changes: 2 additions & 1 deletion cfgmgr/portmgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ class PortMgr : public Orch
void doSendToIngressPortTask(Consumer &consumer);
bool writeConfigToAppDb(const std::string &alias, const std::string &field, const std::string &value);
bool writeConfigToAppDb(const std::string &alias, std::vector<FieldValueTuple> &field_values);
bool setPortMtu(const std::string &alias, const std::string &mtu);
bool setPortMtu(const std::string &alias, const std::string &mtu, std::unordered_map<std::string, int> &lagMembers);
bool setPortAdminStatus(const std::string &alias, const bool up);
bool isPortStateOk(const std::string &alias);
bool isLagMember(const std::string &alias, std::unordered_map<std::string, int> &lagMembers);
};

}
45 changes: 45 additions & 0 deletions tests/test_portchannel.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,51 @@ def test_portchannel_member_netdev_oper_status(self, dvs, testlog):
# wait for port-channel deletion
time.sleep(1)

# Make sure if a PortChannel member port tries to set an MTU that it is
# ignored and does not cause a runtime error.
def test_portchannel_member_mtu(self, dvs, testlog):
config_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0)
app_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)

# create port-channel
tbl = swsscommon.Table(config_db, "PORTCHANNEL")
fvs = swsscommon.FieldValuePairs([("admin_status", "up"),("mtu", "9100"),("oper_status", "up")])
tbl.set("PortChannel111", fvs)

# set port-channel oper status
tbl = swsscommon.ProducerStateTable(app_db, "LAG_TABLE")
fvs = swsscommon.FieldValuePairs([("admin_status", "up"),("mtu", "9100"),("oper_status", "up")])
tbl.set("PortChannel111", fvs)

# add members to port-channel
tbl = swsscommon.Table(config_db, "PORTCHANNEL_MEMBER")
fvs = swsscommon.FieldValuePairs([("NULL", "NULL")])
tbl.set("PortChannel111|Ethernet0", fvs)
tbl.set("PortChannel111|Ethernet4", fvs)

# wait for port-channel netdev creation
time.sleep(1)

tbl = swsscommon.Table(config_db, "PORT")
fvs = swsscommon.FieldValuePairs([("mtu", "9100")])
tbl.set("Ethernet0", fvs)

# wait for attempted configuration to be applied
time.sleep(1)

# remove port-channel members
tbl = swsscommon.Table(config_db, "PORTCHANNEL_MEMBER")
tbl._del("PortChannel111|Ethernet0")
tbl._del("PortChannel111|Ethernet4")

# remove port-channel
tbl = swsscommon.Table(config_db, "PORTCHANNEL")
tbl._del("PortChannel111")

# wait for port-channel deletion
time.sleep(1)


# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
def test_nonflaky_dummy():
Expand Down

0 comments on commit e226eb1

Please sign in to comment.