Skip to content

Commit

Permalink
tenstorrent#14580: Remove usage of noc_parameters.h from cq_commands.…
Browse files Browse the repository at this point in the history
  • Loading branch information
blozano-tt authored and Christopher Taylor committed Nov 12, 2024
1 parent d109fbd commit 319f902
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#include "tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_test.hpp"


#include "llrt/hal.hpp"

#define CQ_PREFETCH_CMD_BARE_MIN_SIZE tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST)

constexpr uint32_t DEFAULT_TEST_TYPE = 0;
constexpr uint32_t DEVICE_DATA_SIZE = 768 * 1024;
constexpr uint32_t MAX_PAGE_SIZE = 256 * 1024; // bigger than scratch_db_page_size
Expand Down
3 changes: 2 additions & 1 deletion tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3319,7 +3319,8 @@ void Device::end_trace(const uint8_t cq_id, const uint32_t tid) {
auto &trace_data = this->trace_buffer_pool_[tid]->desc->data;
trace_data = std::move(this->sysmem_manager().get_bypass_data());
// Add command to terminate the trace buffer
DeviceCommand command_sequence(CQ_PREFETCH_CMD_BARE_MIN_SIZE);
uint32_t cq_prefetch_cmd_bare_min_size = hal.get_alignment(HalMemType::HOST);
DeviceCommand command_sequence(cq_prefetch_cmd_bare_min_size);
command_sequence.add_prefetch_exec_buf_end();
for (int i = 0; i < command_sequence.size_bytes() / sizeof(uint32_t); i++) {
trace_data.push_back(((uint32_t*)command_sequence.data())[i]);
Expand Down
10 changes: 7 additions & 3 deletions tt_metal/impl/dispatch/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include "tt_metal/impl/kernels/kernel.hpp"
#include "tt_metal/third_party/umd/device/tt_xy_pair.h"

#include "llrt/hal.hpp"

#define CQ_PREFETCH_CMD_BARE_MIN_SIZE tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST)

using namespace tt::tt_metal;

using std::map;
Expand Down Expand Up @@ -338,7 +342,7 @@ EnqueueProgramCommand::EnqueueProgramCommand(
}

void EnqueueProgramCommand::assemble_preamble_commands(ProgramCommandSequence& program_command_sequence, std::vector<ConfigBufferEntry>& kernel_config_addrs) {
constexpr uint32_t uncached_cmd_sequence_sizeB =
uint32_t uncached_cmd_sequence_sizeB =
CQ_PREFETCH_CMD_BARE_MIN_SIZE; // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_SET_WRITE_OFFSET

program_command_sequence.preamble_command_sequence =
Expand All @@ -361,7 +365,7 @@ void EnqueueProgramCommand::assemble_stall_commands(ProgramCommandSequence& prog
// Wait command so previous program finishes
// Wait command with barrier for binaries to commit to DRAM
// Prefetch stall to prevent prefetcher picking up incomplete binaries from DRAM
constexpr uint32_t uncached_cmd_sequence_sizeB =
uint32_t uncached_cmd_sequence_sizeB =
CQ_PREFETCH_CMD_BARE_MIN_SIZE + // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
CQ_PREFETCH_CMD_BARE_MIN_SIZE; // CQ_PREFETCH_CMD_STALL

Expand All @@ -377,7 +381,7 @@ void EnqueueProgramCommand::assemble_stall_commands(ProgramCommandSequence& prog
true, this->dispatch_message_addr, this->expected_num_workers_completed);
} else {
// Wait command so previous program finishes
constexpr uint32_t cached_cmd_sequence_sizeB =
uint32_t cached_cmd_sequence_sizeB =
CQ_PREFETCH_CMD_BARE_MIN_SIZE; // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT

program_command_sequence.stall_command_sequence =
Expand Down
14 changes: 1 addition & 13 deletions tt_metal/impl/dispatch/cq_commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

#pragma once

#include "noc/noc_parameters.h"
#include <cstdint>

constexpr uint32_t CQ_PREFETCH_CMD_BARE_MIN_SIZE = PCIE_ALIGNMENT; // for NOC PCIe alignemnt
constexpr uint32_t CQ_DISPATCH_CMD_SIZE = 16; // for L1 alignment

// Prefetcher CMD ID enums
Expand Down Expand Up @@ -293,18 +292,7 @@ struct CQDispatchCmd {

//////////////////////////////////////////////////////////////////////////////

struct CQPrefetchHToPrefetchDHeader_s {
uint32_t length;
};

typedef union {
struct CQPrefetchHToPrefetchDHeader_s header;
unsigned char padding[CQ_PREFETCH_CMD_BARE_MIN_SIZE];
} CQPrefetchHToPrefetchDHeader;


static_assert(sizeof(CQPrefetchBaseCmd) == sizeof(uint8_t)); // if this fails, padding above needs to be adjusted
static_assert(sizeof(CQDispatchBaseCmd) == sizeof(uint8_t)); // if this fails, padding above needs to be adjusted
static_assert((sizeof(CQPrefetchCmd) & (CQ_DISPATCH_CMD_SIZE - 1)) == 0);
static_assert((sizeof(CQDispatchCmd) & (CQ_DISPATCH_CMD_SIZE - 1)) == 0);
static_assert((sizeof(CQPrefetchHToPrefetchDHeader) & (CQ_PREFETCH_CMD_BARE_MIN_SIZE - 1)) == 0);
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/device_command.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class DeviceCommand {
relay_wait->base.cmd_id = CQ_PREFETCH_CMD_RELAY_INLINE;
relay_wait->relay_inline.dispatcher_type = dispatcher_type;
relay_wait->relay_inline.length = sizeof(CQDispatchCmd);
relay_wait->relay_inline.stride = CQ_PREFETCH_CMD_BARE_MIN_SIZE;
relay_wait->relay_inline.stride = this->pcie_alignment;

wait_cmd->base.cmd_id = CQ_DISPATCH_CMD_WAIT;
wait_cmd->wait.barrier = barrier;
Expand Down
11 changes: 11 additions & 0 deletions tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
#include "tt_metal/impl/dispatch/kernels/cq_common.hpp"
#include "debug/dprint.h"

#include "noc/noc_parameters.h" // PCIE_ALIGNMENT
constexpr uint32_t CQ_PREFETCH_CMD_BARE_MIN_SIZE = PCIE_ALIGNMENT; // for NOC PCIe alignemnt
struct CQPrefetchHToPrefetchDHeader_s {
uint32_t length;
};
typedef union {
struct CQPrefetchHToPrefetchDHeader_s header;
unsigned char padding[CQ_PREFETCH_CMD_BARE_MIN_SIZE];
} CQPrefetchHToPrefetchDHeader;
static_assert((sizeof(CQPrefetchHToPrefetchDHeader) & (CQ_PREFETCH_CMD_BARE_MIN_SIZE - 1)) == 0);

typedef uint16_t prefetch_q_entry_type;

constexpr uint32_t downstream_cb_base = get_compile_time_arg_val(0);
Expand Down

0 comments on commit 319f902

Please sign in to comment.