diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index 2832f0bd12..6fa37f4950 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -118,6 +118,8 @@ void FlexCounterOrch::doTask(Consumer &consumer) { auto itDelay = std::find(std::begin(data), std::end(data), FieldValueTuple(FLEX_COUNTER_DELAY_STATUS_FIELD, "true")); string poll_interval; + string bulk_chunk_size; + string bulk_chunk_size_per_counter; if (itDelay != data.end()) { @@ -141,6 +143,14 @@ void FlexCounterOrch::doTask(Consumer &consumer) } } } + else if (field == BULK_CHUNK_SIZE_FIELD) + { + bulk_chunk_size = value; + } + else if (field == BULK_CHUNK_SIZE_PER_PREFIX_FIELD) + { + bulk_chunk_size_per_counter = value; + } else if(field == FLEX_COUNTER_STATUS_FIELD) { // Currently, the counters are disabled for polling by default @@ -256,6 +266,19 @@ void FlexCounterOrch::doTask(Consumer &consumer) SWSS_LOG_NOTICE("Unsupported field %s", field.c_str()); } } + + if (!bulk_chunk_size.empty() || !bulk_chunk_size_per_counter.empty()) + { + m_groupsWithBulkChunkSize.insert(key); + setFlexCounterGroupBulkChunkSize(flexCounterGroupMap[key], + bulk_chunk_size.empty() ? "NULL" : bulk_chunk_size, + bulk_chunk_size_per_counter.empty() ? "NULL" : bulk_chunk_size_per_counter); + } + else if (m_groupsWithBulkChunkSize.find(key) != m_groupsWithBulkChunkSize.end()) + { + setFlexCounterGroupBulkChunkSize(flexCounterGroupMap[key], "NULL", "NULL"); + m_groupsWithBulkChunkSize.erase(key); + } } consumer.m_toSync.erase(it++); diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 4bc74dc3b8..f3aa03e6c0 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -67,6 +67,7 @@ class FlexCounterOrch: public Orch Table m_bufferQueueConfigTable; Table m_bufferPgConfigTable; Table m_deviceMetadataConfigTable; + std::unordered_set m_groupsWithBulkChunkSize; }; #endif diff --git a/orchagent/pfc_detect_mellanox.lua b/orchagent/pfc_detect_mellanox.lua old mode 100644 new mode 100755 index 5e6d8c00c5..e00243fa65 --- a/orchagent/pfc_detect_mellanox.lua +++ b/orchagent/pfc_detect_mellanox.lua @@ -18,13 +18,20 @@ local timestamp_struct = redis.call('TIME') local timestamp_current = timestamp_struct[1] + timestamp_struct[2] / 1000000 local timestamp_string = tostring(timestamp_current) redis.call('HSET', 'TIMESTAMP', 'pfcwd_poll_timestamp_last', timestamp_string) -local effective_poll_time = poll_time -local effective_poll_time_lasttime = redis.call('HGET', 'TIMESTAMP', 'effective_pfcwd_poll_time_last') +local global_effective_poll_time = poll_time +local global_effective_poll_time_lasttime = redis.call('HGET', 'TIMESTAMP', 'effective_pfcwd_poll_time_last') if timestamp_last ~= false then - effective_poll_time = (timestamp_current - tonumber(timestamp_last)) * 1000000 - redis.call('HSET', 'TIMESTAMP', 'effective_pfcwd_poll_time_last', effective_poll_time) + global_effective_poll_time = (timestamp_current - tonumber(timestamp_last)) * 1000000 + redis.call('HSET', 'TIMESTAMP', 'effective_pfcwd_poll_time_last', global_effective_poll_time) end +local effective_poll_time +local effective_poll_time_lasttime +local port_timestamp_last_cache = {} + +local debug_storm_global = redis.call('HGET', 'DEBUG_STORM', 'enabled') == 'true' +local debug_storm_threshold = tonumber(redis.call('HGET', 'DEBUG_STORM', 'threshold')) + -- Iterate through each queue local n = table.getn(KEYS) for i = n, 1, -1 do @@ -56,12 +63,37 @@ for i = n, 1, -1 do local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS' local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION_US' + -- Get port specific timestamp + local port_timestamp_current = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, 'PFC_WD_time_stamp')) + if port_timestamp_current ~= nil then + local port_timestamp_lasttime = port_timestamp_last_cache[port_id] + if port_timestamp_lasttime == nil then + port_timestamp_lasttime = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, 'PFC_WD_time_stamp_last')) + port_timestamp_last_cache[port_id] = port_timestamp_lasttime + redis.call('HSET', counters_table_name .. ':' .. port_id, 'PFC_WD_time_stamp_last', port_timestamp_current) + end + + if port_timestamp_lasttime ~= nil then + effective_poll_time = (port_timestamp_current - port_timestamp_lasttime) / 1000 + else + effective_poll_time = global_effective_poll_time + end + effective_poll_time_lasttime = false + else + effective_poll_time = global_effective_poll_time + effective_poll_time_lasttime = global_effective_poll_time_lasttime + end + -- Get all counters local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES') local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS') local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key) local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key) + if debug_storm_global then + redis.call('PUBLISH', 'PFC_WD_DEBUG', 'Port ID ' .. port_id .. ' Queue index ' .. queue_index .. ' occupancy ' .. occupancy_bytes .. ' packets ' .. packets .. ' pfc rx ' .. pfc_rx_packets .. ' pfc duration ' .. pfc_duration .. ' effective poll time ' .. tostring(effective_poll_time) .. '(global ' .. tostring(global_effective_poll_time) .. ')') + end + if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then occupancy_bytes = tonumber(occupancy_bytes) packets = tonumber(packets) @@ -82,6 +114,10 @@ for i = n, 1, -1 do pfc_duration_last = tonumber(pfc_duration_last) local storm_condition = (pfc_duration - pfc_duration_last) > (effective_poll_time * 0.99) + if debug_storm_threshold ~= nil and (pfc_duration - pfc_duration_last) > (effective_poll_time * debug_storm_threshold / 100) then + redis.call('PUBLISH', 'PFC_WD_DEBUG', 'Port ID ' .. port_id .. ' Queue index ' .. queue_index .. ' occupancy ' .. occupancy_bytes .. ' packets ' .. packets .. ' pfc rx ' .. pfc_rx_packets .. ' pfc duration ' .. pfc_duration .. ' effective poll time ' .. tostring(effective_poll_time) .. ', triggered by threshold ' .. debug_storm_threshold .. '%') + end + -- Check actual condition of queue being in PFC storm if (occupancy_bytes > 0 and packets - packets_last == 0 and storm_condition) or -- DEBUG CODE START. Uncomment to enable diff --git a/orchagent/saihelper.cpp b/orchagent/saihelper.cpp index e7cf7fb018..773db489d6 100644 --- a/orchagent/saihelper.cpp +++ b/orchagent/saihelper.cpp @@ -854,6 +854,8 @@ static inline void initSaiRedisCounterEmptyParameter(sai_redis_flex_counter_grou initSaiRedisCounterEmptyParameter(flex_counter_group_param.stats_mode); initSaiRedisCounterEmptyParameter(flex_counter_group_param.plugin_name); initSaiRedisCounterEmptyParameter(flex_counter_group_param.plugins); + initSaiRedisCounterEmptyParameter(flex_counter_group_param.bulk_chunk_size); + initSaiRedisCounterEmptyParameter(flex_counter_group_param.bulk_chunk_size_per_prefix); } static inline void initSaiRedisCounterParameterFromString(sai_s8_list_t &sai_s8_list, const std::string &str) @@ -938,6 +940,8 @@ void setFlexCounterGroupParameter(const string &group, attr.id = SAI_REDIS_SWITCH_ATTR_FLEX_COUNTER_GROUP; attr.value.ptr = &flex_counter_group_param; + initSaiRedisCounterEmptyParameter(flex_counter_group_param.bulk_chunk_size); + initSaiRedisCounterEmptyParameter(flex_counter_group_param.bulk_chunk_size_per_prefix); initSaiRedisCounterParameterFromString(flex_counter_group_param.counter_group_name, group); initSaiRedisCounterParameterFromString(flex_counter_group_param.poll_interval, poll_interval); initSaiRedisCounterParameterFromString(flex_counter_group_param.operation, operation); @@ -1017,6 +1021,25 @@ void setFlexCounterGroupStatsMode(const std::string &group, notifySyncdCounterOperation(is_gearbox, attr); } +void setFlexCounterGroupBulkChunkSize(const std::string &group, + const std::string &bulk_chunk_size, + const std::string &bulk_chunk_size_per_prefix, + bool is_gearbox) +{ + sai_attribute_t attr; + sai_redis_flex_counter_group_parameter_t flex_counter_group_param; + + attr.id = SAI_REDIS_SWITCH_ATTR_FLEX_COUNTER_GROUP; + attr.value.ptr = &flex_counter_group_param; + + initSaiRedisCounterEmptyParameter(flex_counter_group_param); + initSaiRedisCounterParameterFromString(flex_counter_group_param.counter_group_name, group); + initSaiRedisCounterParameterFromString(flex_counter_group_param.bulk_chunk_size, bulk_chunk_size); + initSaiRedisCounterParameterFromString(flex_counter_group_param.bulk_chunk_size_per_prefix, bulk_chunk_size_per_prefix); + + notifySyncdCounterOperation(is_gearbox, attr); +} + void delFlexCounterGroup(const std::string &group, bool is_gearbox) { diff --git a/orchagent/saihelper.h b/orchagent/saihelper.h index 7334adff35..0406427059 100644 --- a/orchagent/saihelper.h +++ b/orchagent/saihelper.h @@ -39,6 +39,11 @@ void setFlexCounterGroupStatsMode(const std::string &group, const std::string &stats_mode, bool is_gearbox=false); +void setFlexCounterGroupBulkChunkSize(const std::string &group, + const std::string &bulk_size, + const std::string &bulk_chunk_size_per_prefix, + bool is_gearbox=false); + void delFlexCounterGroup(const std::string &group, bool is_gearbox=false); diff --git a/tests/mock_tests/flexcounter_ut.cpp b/tests/mock_tests/flexcounter_ut.cpp index fa3b62e795..64b2e5b09c 100644 --- a/tests/mock_tests/flexcounter_ut.cpp +++ b/tests/mock_tests/flexcounter_ut.cpp @@ -111,6 +111,10 @@ namespace flexcounter_test } else { + if (flexCounterGroupParam->bulk_chunk_size.list != nullptr || flexCounterGroupParam->bulk_chunk_size_per_prefix.list != nullptr) + { + return SAI_STATUS_SUCCESS; + } mockFlexCounterGroupTable->del(key); }