Skip to content

Commit

Permalink
[feature] Add support for bytewise Load and Store and fix DMA model
Browse files Browse the repository at this point in the history
  • Loading branch information
Victor-Jung committed Jun 25, 2024
1 parent 9644dd1 commit 76f052c
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 74 deletions.
48 changes: 28 additions & 20 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -756,8 +756,6 @@ impl<'a, 'b> Cpu<'a, 'b> {
+ self.engine.config.memory.tcdm.size)
}) =>
{
trace!("TCDM Binary Load");
trace!("Binary load address: 0x{:x}", x);
let id = (0..self.engine.num_clusters)
.position(|i| {
addr >= (self.engine.config.memory.tcdm.start
Expand All @@ -775,7 +773,9 @@ impl<'a, 'b> Cpu<'a, 'b> {
let word_offs = tcdm_addr - 4 * word_addr;
let ptr: *const u32 = self.tcdm_ptr[id];

Check warning on line 774 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Format

Diff in /home/runner/work/banshee/banshee/src/engine.rs
let word = unsafe { *ptr.offset(word_addr as isize) };
(word >> (8 * word_offs)) & ((((1 as u64) << (8 << size)) - 1) as u32)
let val = (word >> (8 * word_offs)) & ((((1 as u64) << (8 << size)) - 1) as u32);
debug!("TCDM Load: addr: 0x{:x} value: 0x{:x}", x, (word >> (8 * word_offs)) & ((((1 as u64) << (8 << size)) - 1) as u32));
val
}
// Peripherals
x if (0..self.engine.num_clusters).any(|i| {
Expand Down Expand Up @@ -841,20 +841,26 @@ impl<'a, 'b> Cpu<'a, 'b> {
"Hart {} (pc=0x{:08x}) is reading outside the memory map at 0x{:08x}",

Check warning on line 841 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Format

Diff in /home/runner/work/banshee/banshee/src/engine.rs
self.hartid, self.state.pc, addr
);
}
let word_offset = addr % 4;
let mask = (!(u64::MAX << (8 << size))) as u32;
if (size as u32) > (4 - word_offset) {
warn!(
"Hart {} (pc=0x{:08x}) is doing an unaligned load in DRAM at 0x{:08x}",
self.hartid, self.state.pc, addr

Check warning on line 850 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Format

Diff in /home/runner/work/banshee/banshee/src/engine.rs
);
}
// trace!("Load 0x{:x} ({}B)", addr, 8 << size);
self.engine
.memory
.lock()
.unwrap()
.get(&(addr as u64))
.copied()
.unwrap_or(0)
let shift = (8 * (word_offset));

Check failure on line 853 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Banshee (1.63.0)

unnecessary parentheses around assigned value
let word = ((self.engine.memory.lock().unwrap().get(&((addr - word_offset) as u64)).copied().unwrap_or(0)) >> shift) & mask;
debug!("DRAM Load: addr 0x{:x} value 0x{:x} shift {} mask 0x{:x} ({}B)", addr, word, shift, mask, 8 << size);
(word as u32)

Check failure on line 856 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Banshee (1.63.0)

unnecessary parentheses around block return value
}
}
}

pub fn binary_store(&self, addr: u32, value: u32, mask: u32, size: u8) {
pub fn binary_store(&self, addr: u32, value: u32, size: u8) {
let word_offset = addr % 4;
let mask = ((((1 as u64) << (8 << size)) - 1) << (8 * word_offset)) as u32;
match addr {
x if x == self.engine.config.address.tcdm_start => (), // tcdm_start
x if x == self.engine.config.address.tcdm_end => (), // tcdm_end
Expand All @@ -873,6 +879,7 @@ impl<'a, 'b> Cpu<'a, 'b> {
x if x == self.engine.config.address.uart => {
let mut buffer = self.engine.putchar_buffer.lock().unwrap();
let buffer = buffer.entry(self.hartid).or_default();
debug!("UART Store: addr 0x{:x} value 0x{:x}", addr, value);
if value == '\n' as u32 {
eprintln!(
"{}{} hart-{:03} {} {}",
Expand Down Expand Up @@ -917,11 +924,10 @@ impl<'a, 'b> Cpu<'a, 'b> {
let word_offs = tcdm_addr - 4 * word_addr;
let ptr = self.tcdm_ptr[id] as *const u32;
let ptr_mut = ptr as *mut u32;
let wmask = ((((1 as u64) << (8 << size)) - 1) as u32) << (8 * word_offs);
unsafe {
let word_ptr = ptr_mut.offset(word_addr as isize);
let word = *word_ptr;
*word_ptr = (word & !wmask) | ((value << (8 * word_offs)) & wmask);
*word_ptr = (word & !mask) | ((value << (8 * word_offs)) & mask);
}
}
// Peripherals
Expand Down Expand Up @@ -1012,17 +1018,19 @@ impl<'a, 'b> Cpu<'a, 'b> {
self.hartid, self.state.pc, addr
);
}
trace!(
"Store 0x{:x} = 0x{:x} if 0x{:x} ({}B)",
debug!(
"DRAM Store: addr 0x{:x} value 0x{:x} mask 0x{:x} ({}B)",
addr,
value,
mask,

Check warning on line 1025 in src/engine.rs

View workflow job for this annotation

GitHub Actions / Format

Diff in /home/runner/work/banshee/banshee/src/engine.rs
8 << size
);
let offset_addr = addr - word_offset;
let mut data = self.engine.memory.lock().unwrap();
let data = data.entry(addr as u64).or_default();
let data = data.entry(offset_addr as u64).or_default();
let shifted_value = value << 8 * (addr % 4);
*data &= !mask;
*data |= value & mask;
*data |= shifted_value & mask;
}
}
}
Expand Down Expand Up @@ -1062,14 +1070,14 @@ impl<'a, 'b> Cpu<'a, 'b> {
// Aligned transfer
for _ in 0..n / 4 {
let tmp = self.binary_load(src, 2);
self.binary_store(dest, tmp, u32::MAX, 2);
self.binary_store(dest, tmp, 2);
src += 4;
dest += 4;
}
} else {
for _ in 0..n {
let tmp = self.binary_load(src, 0);
self.binary_store(dest, tmp, (u8::MAX as u32) << (8 * (dest % 4)), 0);
self.binary_store(dest, tmp, 0);
src += 1;
dest += 1;
}
Expand Down
2 changes: 1 addition & 1 deletion src/peripherals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ impl MemPoolITA {
data[[j as usize, ((n / splits) * split + i) as usize + offset]] as u8;
}
let word = u32::from_ne_bytes(elements);
cpu.binary_store(address + address_offset, word, u32::MAX, 2);
cpu.binary_store(address + address_offset, word, 2);
debug!(
"[ITA, CPU {}] Store OUT to 0x{:x}",
&cpu.hartid,
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/jit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

; Forward declarations.
declare i32 @banshee_load(%Cpu* %cpu, i32 %addr, i8 %size)
declare void @banshee_store(%Cpu* %cpu, i32 %addr, i32 %value, i32 %mask, i8 %size)
declare void @banshee_store(%Cpu* %cpu, i32 %addr, i32 %value, i8 %size)
declare i32 @banshee_rmw(%Cpu* %cpu, i32 %addr, i32 %value, i8 %op)
declare i32 @banshee_csr_read(%Cpu* %cpu, i16 %csr, i32 %notrace)
declare void @banshee_csr_write(%Cpu* %cpu, i16 %csr, i32 %value, i32 %notrace)
Expand Down Expand Up @@ -58,7 +58,7 @@ declare float @banshee_fp16_to_fp32_op(i16 %rs1, i16 %rs2, float %rs3, i8 %op, i
declare i16 @banshee_fp8_to_fp16_op(i8 %rs1, i8 %rs2, i16 %rs3, i8 %op, i1 %fpmode_src, i1 %fpmode_dst)
declare float @banshee_fp8_to_fp32_op(i8 %rs1, i8 %rs2, float %rs3, i8 %op, i1 %fpmode_src)

declare void @banshee_ssr_write_cfg(%SsrState* %ssr, %Cpu* %cpu, i32 %addr, i32 %value, i32 %mask)
declare void @banshee_ssr_write_cfg(%SsrState* %ssr, %Cpu* %cpu, i32 %addr, i32 %value)
declare i32 @banshee_ssr_read_cfg(%SsrState* readonly %ssr, i32 %addr)
declare i32 @banshee_ssr_next(%SsrState* %ssr, %Cpu* %cpu)
declare void @banshee_ssr_eoi(%SsrState* %ssr)
Expand Down
27 changes: 8 additions & 19 deletions src/runtime/jit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,11 @@ pub unsafe fn banshee_ssr_write_cfg(
ssr: &mut SsrState,
cpu: &mut Cpu,
addr: u32,
value: u32,
mask: u32,
value: u32
) {
extern "C" {
fn banshee_load(cpu: &mut Cpu, addr: u32, size: u8) -> u32;
}
// TODO: Handle the mask!
let addr = addr as usize / 8;
let mut set_ptr = 0;
match addr {
Expand Down Expand Up @@ -269,33 +267,24 @@ pub unsafe fn banshee_dma_rep(dma: &mut DmaState, reps: u32) {
pub unsafe fn banshee_dma_strt(dma: &mut DmaState, cpu: &mut Cpu, size: u32, flags: u32) -> u32 {
extern "C" {
fn banshee_load(cpu: &mut Cpu, addr: u32, size: u8) -> u32;
fn banshee_store(cpu: &mut Cpu, addr: u32, value: u32, mask: u32, size: u8);
fn banshee_store(cpu: &mut Cpu, addr: u32, value: u32, size: u8);
}

let id = dma.done_id;
dma.done_id += 1;
dma.size = size;

// assert_eq!(
// size % 4,
// 0,
// "DMA transfer size must be a multiple of 4B for now"
// );
let num_beats = size / 4;
let enable_2d = (flags & (1 << 1)) != 0;
let steps = if enable_2d { dma.reps } else { 1 };

for i in 0..steps as u64 {
let src = dma.src + i * dma.src_stride as u64;
let dst = dma.dst + i * dma.dst_stride as u64;
// assert_eq!(src % 4, 0, "DMA src transfer block must be 4-byte-aligned");
// assert_eq!(dst % 4, 0, "DMA dst transfer block must be 4-byte-aligned");
for j in 0..num_beats as u64 {
let tmp = banshee_load(cpu, (src + j * 4) as u32, 2);
banshee_store(cpu, (dst + j * 4) as u32, tmp, u32::max_value(), 2);
let mut src = dma.src + i * dma.src_stride as u64;
let mut dst = dma.dst + i * dma.dst_stride as u64;
for j in 0..size as u64 {
let tmp = banshee_load(cpu, (src + j) as u32, 0);
banshee_store(cpu, (dst + j) as u32, tmp, 0);
}
}

id
}

Expand Down
36 changes: 4 additions & 32 deletions src/tran.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ impl<'a> InstructionTranslator<'a> {
[
self.section.state_ptr,
addr,
LLVMConstInt(LLVMInt8Type(), 4 as u64, 0),
LLVMConstInt(LLVMInt8Type(), 2 as u64, 0),
]
.as_mut_ptr(),
3,
Expand Down Expand Up @@ -6459,14 +6459,8 @@ impl<'a> InstructionTranslator<'a> {
),
[
self.section.state_ptr,
// LLVMBuildBitCast(
// self.builder,
// self.section.state_ptr,
// LLVMPointerType(LLVMInt8Type(), 0),
// NONAME,
// ),
aligned_addr,
LLVMConstInt(LLVMInt8Type(), size as u64, 0),
LLVMConstInt(LLVMInt8Type(), 2 as u64, 0),
]
.as_mut_ptr(),
3,
Expand Down Expand Up @@ -6563,27 +6557,6 @@ impl<'a> InstructionTranslator<'a> {
NONAME,
);

// Compute the misalignment.
let shift = LLVMBuildAnd(
self.builder,
addr,
LLVMConstInt(LLVMInt32Type(), 3, 0),
NONAME,
);
let shift = LLVMBuildMul(
self.builder,
shift,
LLVMConstInt(LLVMInt32Type(), 8, 0),
NONAME,
);

// Align the data to the address and generate a bit mask.
let mask = LLVMConstNull(ty);
let mask = LLVMBuildNot(self.builder, mask, NONAME);
let mask = LLVMBuildZExt(self.builder, mask, LLVMInt32Type(), NONAME);
let mask = LLVMBuildShl(self.builder, mask, shift, NONAME);
let value = LLVMBuildShl(self.builder, value, shift, NONAME);

// Check if the address is in the SSR configuration space.
let (is_ssr, ssr_ptr, ssr_addr) = self.emit_ssr_check(aligned_addr);
let bb_ssr = LLVMCreateBasicBlockInContext(self.section.engine.context, NONAME);
Expand All @@ -6596,7 +6569,7 @@ impl<'a> InstructionTranslator<'a> {
LLVMPositionBuilderAtEnd(self.builder, bb_ssr);
self.section.emit_call(
"banshee_ssr_write_cfg",
[ssr_ptr, self.section.state_ptr, ssr_addr, value, mask],
[ssr_ptr, self.section.state_ptr, ssr_addr, value],
);
LLVMBuildBr(self.builder, bb_end);
LLVMPositionBuilderAtEnd(self.builder, bb_nossr);
Expand All @@ -6606,9 +6579,8 @@ impl<'a> InstructionTranslator<'a> {
"banshee_store",
[
self.section.state_ptr,
aligned_addr,
addr,
value,
mask,
LLVMConstInt(LLVMInt8Type(), size as u64, 0),
],
);
Expand Down

0 comments on commit 76f052c

Please sign in to comment.