diff --git a/umi/sumi/rtl/umi_arbiter.v b/umi/sumi/rtl/umi_arbiter.v index 424ae10..fd5c1cf 100644 --- a/umi/sumi/rtl/umi_arbiter.v +++ b/umi/sumi/rtl/umi_arbiter.v @@ -38,6 +38,21 @@ module umi_arbiter wire [N-1:0] spec_requests; genvar i; + // NOTE: The thermometer mask works correctly in case of a collision + // that is followed by a single request from a masked source. + // Consider, 4 requestors but only 0 and 1 are requesting: + // cycle 0: req[0] = 1, req[1] = 1, grants[0] = 1, grants[1] = 0, collision = 1, therm = 4'b0000 + // cycle 1: req[0] = 0, req[1] = 1, grants[0] = 0, grants[1] = 1, collision = 0, therm = 4'b0001 + // cycle 2: req[0] = 1, req[1] = 0, grants[0] = 0, grants[1] = 0, collision = 1, therm = 4'b0001 + // cycle 3: req[0] = 1, req[1] = 0, grants[0] = 0, grants[1] = 0, collision = 1, therm = 4'b0011 + // cycle 4: req[0] = 1, req[1] = 0, grants[0] = 0, grants[1] = 0, collision = 1, therm = 4'b0111 + // cycle 5: req[0] = 1, req[1] = 0, grants[0] = 1, grants[1] = 0, collision = 0, therm = 4'b0000 + // Here, after cycle 0, requestor 0 was masked due to a collision with + // requestor 1. When requestor 0 sends its second request with no other + // requestors trying, it incurs a 3 cycle penalty for the thermometer to + // fill up. While the 3 cycle penalty is detrimental to performance the + // system does not hang. + // Thermometer mask that gets hotter with every collision // wraps to zero when all ones generate if (N > 1) diff --git a/umi/sumi/testbench/testbench_mux.sv b/umi/sumi/testbench/testbench_mux.sv index ccdbbd1..510fe92 100644 --- a/umi/sumi/testbench/testbench_mux.sv +++ b/umi/sumi/testbench/testbench_mux.sv @@ -147,7 +147,7 @@ module testbench ( ) umi_mux_i ( .clk (clk), .nreset (nreset), - .arbmode (2'b00), + .arbmode (2'b10), .arbmask ({N{1'b0}}), .umi_in_valid (umi_in_valid), diff --git a/umi/sumi/tests/conftest.py b/umi/sumi/tests/conftest.py index 5fb5534..c8801ff 100644 --- a/umi/sumi/tests/conftest.py +++ b/umi/sumi/tests/conftest.py @@ -1,10 +1,11 @@ import pytest -from switchboard import SbDut +from switchboard import SbDut, UmiTxRx, random_umi_packet import os from pathlib import Path from umi import sumi from fasteners import InterProcessLock import multiprocessing +import numpy as np def pytest_collection_modifyitems(items): @@ -84,3 +85,75 @@ def random_seed(request): print(f'Random seed used: {test_seed}') yield test_seed print(f'Random seed used: {test_seed}') + + +@pytest.fixture +def umi_send(random_seed): + + def setup(host_num, num_packets_to_send, num_out_ports): + np.random.seed(random_seed) + + umi = UmiTxRx(f'client2rtl_{host_num}.q', '') + tee = UmiTxRx(f'tee_{host_num}.q', '') + + for count in range(num_packets_to_send): + dstport = np.random.randint(num_out_ports) + dstaddr = (2**8)*np.random.randint(2**32) + dstport*(2**40) + srcaddr = (2**8)*np.random.randint(2**32) + host_num*(2**40) + txp = random_umi_packet(dstaddr=dstaddr, srcaddr=srcaddr) + print(f"port {host_num} sending #{count} cmd: 0x{txp.cmd:08x}" + f"srcaddr: 0x{srcaddr:08x} dstaddr: 0x{dstaddr:08x} to port {dstport}") + # send the packet to both simulation and local queues + umi.send(txp) + tee.send(txp) + + return setup + + +@pytest.fixture +def apply_atomic(): + + def setup(origdata, atomicdata, operation, maxrange): + tempval = origdata + if (operation == 0): + tempval = origdata + atomicdata + if (tempval >= maxrange): + tempval = tempval - maxrange + elif (operation == 1): + tempval = origdata & atomicdata + elif (operation == 2): + tempval = origdata | atomicdata + elif (operation == 3): + tempval = origdata ^ atomicdata + elif (operation == 4): + if (origdata & (maxrange >> 1)): + origdata = int(origdata) - int(maxrange) + else: + origdata = int(origdata) + if (atomicdata & (maxrange >> 1)): + atomicdata = int(atomicdata) - int(maxrange) + else: + atomicdata = int(atomicdata) + tempval = origdata if (origdata > atomicdata) else atomicdata + elif (operation == 5): + if (origdata & (maxrange >> 1)): + origdata = int(origdata) - int(maxrange) + else: + origdata = int(origdata) + if (atomicdata & (maxrange >> 1)): + atomicdata = int(atomicdata) - int(maxrange) + else: + atomicdata = int(atomicdata) + tempval = atomicdata if (origdata > atomicdata) else origdata + elif (operation == 6): + tempval = origdata if (origdata > atomicdata) else atomicdata + elif (operation == 7): + tempval = atomicdata if (origdata > atomicdata) else origdata + elif (operation == 8): + tempval = atomicdata + else: + tempval = atomicdata + + return tempval + + return setup diff --git a/umi/sumi/tests/test_crossbar.py b/umi/sumi/tests/test_crossbar.py index d8b5a1b..5fc789a 100755 --- a/umi/sumi/tests/test_crossbar.py +++ b/umi/sumi/tests/test_crossbar.py @@ -5,31 +5,11 @@ import pytest import multiprocessing -import random -from switchboard import UmiTxRx, random_umi_packet, delete_queue - - -def umi_send(x, n, ports, seed): - - random.seed(seed) - - umi = UmiTxRx(f'client2rtl_{x}.q', '') - tee = UmiTxRx(f'tee_{x}.q', '') - - for count in range(n): - dstport = random.randint(0, ports-1) - dstaddr = (2**8)*random.randint(0, (2**32)-1) + dstport*(2**40) - srcaddr = (2**8)*random.randint(0, (2**32)-1) + x*(2**40) - txp = random_umi_packet(dstaddr=dstaddr, srcaddr=srcaddr) - print(f"port {x} sending #{count} cmd: 0x{txp.cmd:08x} srcaddr: 0x{srcaddr:08x} " - f"dstaddr: 0x{dstaddr:08x} to port {dstport}") - # send the packet to both simulation and local queues - umi.send(txp) - tee.send(txp) +from switchboard import UmiTxRx, delete_queue @pytest.mark.skip(reason="Crossbar asserts output valid even when in reset") -def test_crossbar(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_crossbar(sumi_dut, umi_send, sb_umi_valid_mode, sb_umi_ready_mode): n = 100 ports = 4 for x in range(ports): @@ -56,7 +36,8 @@ def test_crossbar(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): procs = [] for x in range(ports): - procs.append(multiprocessing.Process(target=umi_send, args=(x, n, ports, (random_seed+x),))) + procs.append(multiprocessing.Process(target=umi_send, + args=(x, n, ports,))) for proc in procs: proc.start() diff --git a/umi/sumi/tests/test_demux.py b/umi/sumi/tests/test_demux.py index 854fe3b..4be70c3 100755 --- a/umi/sumi/tests/test_demux.py +++ b/umi/sumi/tests/test_demux.py @@ -6,10 +6,9 @@ import pytest import multiprocessing from switchboard import UmiTxRx, delete_queue -from umi_common import umi_send -def test_demux(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_demux(sumi_dut, umi_send, sb_umi_valid_mode, sb_umi_ready_mode): n = 4000 # Number of transactions to be sent to each demux input port in_ports = 1 # Number of input ports. Fixed to 1 for demux out_ports = 4 # Number of output ports. Must match testbench @@ -30,7 +29,7 @@ def test_demux(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): print("### Starting test ###") send_proc = multiprocessing.Process(target=umi_send, - args=(0, n, out_ports, random_seed,)) + args=(0, n, out_ports,)) send_proc.start() diff --git a/umi/sumi/tests/test_mem_agent.py b/umi/sumi/tests/test_mem_agent.py index 69f1430..0f632c7 100755 --- a/umi/sumi/tests/test_mem_agent.py +++ b/umi/sumi/tests/test_mem_agent.py @@ -8,51 +8,7 @@ from switchboard import UmiTxRx -def apply_atomic(origdata, atomicdata, operation, maxrange): - tempval = origdata - if (operation == 0): - tempval = origdata + atomicdata - if (tempval >= maxrange): - tempval = tempval - maxrange - elif (operation == 1): - tempval = origdata & atomicdata - elif (operation == 2): - tempval = origdata | atomicdata - elif (operation == 3): - tempval = origdata ^ atomicdata - elif (operation == 4): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 5): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 6): - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 7): - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 8): - tempval = atomicdata - else: - tempval = atomicdata - - return tempval - - -def test_mem_agent(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_mem_agent(sumi_dut, apply_atomic, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): np.random.seed(random_seed) diff --git a/umi/sumi/tests/test_mux.py b/umi/sumi/tests/test_mux.py index 6e51f6d..293d8c2 100644 --- a/umi/sumi/tests/test_mux.py +++ b/umi/sumi/tests/test_mux.py @@ -5,11 +5,10 @@ import pytest import multiprocessing -from switchboard import UmiTxRx, delete_queue -from umi_common import umi_send +from switchboard import UmiTxRx, random_umi_packet, delete_queue -def test_mux(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_mux(sumi_dut, umi_send, sb_umi_valid_mode, sb_umi_ready_mode): n = 1000 # Number of transactions to be sent to each mux input port in_ports = 4 # Number of input ports. Must match testbench out_ports = 1 # Number of output ports. Fixed to 1 for mux @@ -33,7 +32,7 @@ def test_mux(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): for x in range(in_ports): send_procs.append(multiprocessing.Process(target=umi_send, - args=(x, n, out_ports, (random_seed+x),))) + args=(x, n, out_ports,))) for proc in send_procs: proc.start() @@ -79,5 +78,32 @@ def test_mux(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): print(f"Received {len(recv_queue[i])} packets at port {i}") +@pytest.mark.skip(reason="Must only be run when evaluating performance using waveforms") +def test_round_robin_arb(sumi_dut): + ''' + This test is used to get an initial idea of the performance impact of + the arbitration scheme present in the mux. With the thermometer based + round robin scheme this test shows a performance penalty of up to 4 + cycles for a thermometer masked transaction. This test must be run with + the waveform enabled. + ''' + + # Instantiate TX and RX queues + inq = [UmiTxRx(f'client2rtl_{x}.q', '', fresh=True) for x in range(2)] + + # launch the simulation + sumi_dut.simulate( + plusargs=[('valid_mode', 1), + ('ready_mode', 1)]) + + txp = random_umi_packet() + print(f"Sending cmd: 0x{txp.cmd:08x} " + f"srcaddr: 0x{txp.srcaddr:08x} dstaddr: 0x{txp.dstaddr:08x}") + # send the packet to both simulation and local queues + inq[0].send(txp) + inq[1].send(txp) + inq[0].send(txp) + + if __name__ == '__main__': pytest.main(['-s', '-q', __file__]) diff --git a/umi/sumi/tests/test_regif.py b/umi/sumi/tests/test_regif.py index ad211a0..a523e6d 100755 --- a/umi/sumi/tests/test_regif.py +++ b/umi/sumi/tests/test_regif.py @@ -8,51 +8,7 @@ from switchboard import UmiTxRx -def apply_atomic(origdata, atomicdata, operation, maxrange): - tempval = origdata - if (operation == 0): - tempval = origdata + atomicdata - if (tempval >= maxrange): - tempval = tempval - maxrange - elif (operation == 1): - tempval = origdata & atomicdata - elif (operation == 2): - tempval = origdata | atomicdata - elif (operation == 3): - tempval = origdata ^ atomicdata - elif (operation == 4): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 5): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 6): - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 7): - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 8): - tempval = atomicdata - else: - tempval = atomicdata - - return tempval - - -def test_regif(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_regif(sumi_dut, apply_atomic, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): n = 100 # Number of reads, atomic txns and writes each from the register file diff --git a/umi/sumi/tests/test_switch.py b/umi/sumi/tests/test_switch.py index f7fa5ad..d8cf2ac 100755 --- a/umi/sumi/tests/test_switch.py +++ b/umi/sumi/tests/test_switch.py @@ -6,10 +6,9 @@ import pytest import multiprocessing from switchboard import UmiTxRx, delete_queue -from umi_common import umi_send -def test_switch(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_switch(sumi_dut, umi_send, sb_umi_valid_mode, sb_umi_ready_mode): n = 1000 # Number of transactions to be sent to each switch input port in_ports = 4 # Number of input ports. Must match testbench out_ports = 2 # Number of output ports. Must match testbench @@ -32,7 +31,7 @@ def test_switch(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): procs = [] for x in range(in_ports): procs.append(multiprocessing.Process(target=umi_send, - args=(x, n, out_ports, (random_seed+x),))) + args=(x, n, out_ports,))) for proc in procs: proc.start() diff --git a/umi/sumi/tests/test_umi_ram.py b/umi/sumi/tests/test_umi_ram.py index b69a88c..c1ce609 100755 --- a/umi/sumi/tests/test_umi_ram.py +++ b/umi/sumi/tests/test_umi_ram.py @@ -8,51 +8,7 @@ from switchboard import UmiTxRx -def apply_atomic(origdata, atomicdata, operation, maxrange): - tempval = origdata - if (operation == 0): - tempval = origdata + atomicdata - if (tempval >= maxrange): - tempval = tempval - maxrange - elif (operation == 1): - tempval = origdata & atomicdata - elif (operation == 2): - tempval = origdata | atomicdata - elif (operation == 3): - tempval = origdata ^ atomicdata - elif (operation == 4): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 5): - if (origdata & (maxrange >> 1)): - origdata = int(origdata) - int(maxrange) - else: - origdata = int(origdata) - if (atomicdata & (maxrange >> 1)): - atomicdata = int(atomicdata) - int(maxrange) - else: - atomicdata = int(atomicdata) - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 6): - tempval = origdata if (origdata > atomicdata) else atomicdata - elif (operation == 7): - tempval = atomicdata if (origdata > atomicdata) else origdata - elif (operation == 8): - tempval = atomicdata - else: - tempval = atomicdata - - return tempval - - -def test_umi_ram(sumi_dut, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): +def test_umi_ram(sumi_dut, apply_atomic, random_seed, sb_umi_valid_mode, sb_umi_ready_mode): ports = 5 # Number of input ports of umi_ram. Must match testbench n = 100 # Number of reads, atomic txns and writes each from the umi_ram diff --git a/umi/sumi/tests/umi_common.py b/umi/sumi/tests/umi_common.py deleted file mode 100644 index aec2dc9..0000000 --- a/umi/sumi/tests/umi_common.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2024 Zero ASIC -# This code is licensed under Apache License 2.0 (see LICENSE for details) - -import numpy as np -from switchboard import UmiTxRx, random_umi_packet - - -def umi_send(host_num, num_packets_to_send, num_out_ports, seed): - - np.random.seed(seed) - - umi = UmiTxRx(f'client2rtl_{host_num}.q', '') - tee = UmiTxRx(f'tee_{host_num}.q', '') - - for count in range(num_packets_to_send): - dstport = np.random.randint(num_out_ports) - dstaddr = (2**8)*np.random.randint(2**32) + dstport*(2**40) - srcaddr = (2**8)*np.random.randint(2**32) + host_num*(2**40) - txp = random_umi_packet(dstaddr=dstaddr, srcaddr=srcaddr) - print(f"port {host_num} sending #{count} cmd: 0x{txp.cmd:08x} srcaddr: 0x{srcaddr:08x} " - f"dstaddr: 0x{dstaddr:08x} to port {dstport}") - # send the packet to both simulation and local queues - umi.send(txp) - tee.send(txp)