From f43da416fdc55a515bf5c0780f91c78c72dc1101 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 8 Sep 2024 14:45:09 +0200 Subject: [PATCH] n64: refactor devirtualize and memory ops, force JIT only with instruction cache (#1635) This change contains a large refactoring with no functional changes, plus a small functional change that will help speeding up the JIT. The refactoring allows CPU::devirtualize to return all the information regarding the memory access. This in turn allows to simplify all memory ops (which contained lots of duplicated code) to use devirtualize, reducing code duplication. The functional change is that we now use JIT only when running code from icache (which is 99.9999% of the times). Running code without icache is extremely slow on real hardware and only happens in specific situations (eg: during boot when RDRAM is not initialized). By limiting the JIT to run from icache, we open the door to implement proper icache support in the JIT and finish remove the instruction epilogue with its slow "instruction cache stepper" (which in addition to being slow, is also inaccurate and make us fail cache test ROMs). --- ares/n64/cpu/cpu.cpp | 14 ++- ares/n64/cpu/cpu.hpp | 88 ++++++++------- ares/n64/cpu/dcache.cpp | 70 ++++++------ ares/n64/cpu/interpreter-ipu.cpp | 59 +++++----- ares/n64/cpu/memory.cpp | 181 ++++++++++--------------------- ares/n64/cpu/tlb.cpp | 50 +++++---- ares/n64/system/system.cpp | 8 +- nall/gdb/server.cpp | 2 +- 8 files changed, 209 insertions(+), 263 deletions(-) diff --git a/ares/n64/cpu/cpu.cpp b/ares/n64/cpu/cpu.cpp index 884846d445..29f03ce916 100644 --- a/ares/n64/cpu/cpu.cpp +++ b/ares/n64/cpu/cpu.cpp @@ -108,13 +108,15 @@ auto CPU::instruction() -> void { return; } - if(Accuracy::CPU::Recompiler && recompiler.enabled) { - if (auto address = devirtualize(ipu.pc)) { - auto block = recompiler.block(ipu.pc, *address, GDB::server.hasBreakpoints()); - block->execute(*this); - } + auto access = devirtualize(ipu.pc); + if(!access) return; + + if(Accuracy::CPU::Recompiler && recompiler.enabled && access.cache) { + if(vaddrAlignedError(access.vaddr, false)) return; + auto block = recompiler.block(ipu.pc, access.paddr, GDB::server.hasBreakpoints()); + block->execute(*this); } else { - auto data = fetch(ipu.pc); + auto data = fetch(access); if (!data) return; pipeline.begin(); instructionPrologue(ipu.pc, *data); diff --git a/ares/n64/cpu/cpu.hpp b/ares/n64/cpu/cpu.hpp index 6c42ec23f8..bddb7222a1 100644 --- a/ares/n64/cpu/cpu.hpp +++ b/ares/n64/cpu/cpu.hpp @@ -72,6 +72,17 @@ struct CPU : Thread { } } pipeline{*this}; + struct PhysAccess { + enum Direction : u32 { Read, Write }; + + explicit operator bool() const { return found; } + + bool found; //this is a valid physical access + bool cache; //access must go through cache + u32 paddr; //physical address on 32-bit MIPS bus + u64 vaddr; //virtual address used on the CPU (64-bit) + }; + //context.cpp struct Context { CPU& self; @@ -101,29 +112,27 @@ struct CPU : Thread { struct InstructionCache { CPU& self; struct Line; - auto line(u32 vaddr) -> Line& { return lines[vaddr >> 5 & 0x1ff]; } + auto line(u64 vaddr) -> Line& { return lines[vaddr >> 5 & 0x1ff]; } //used by the recompiler to simulate instruction cache fetch timing - auto step(u32 vaddr, u32 address) -> void { + auto step(u64 vaddr, u32 paddr) -> void { auto& line = this->line(vaddr); - if(!line.hit(address)) { + if(!line.hit(paddr)) { self.step(48 * 2); line.valid = 1; - line.tag = address & ~0x0000'0fff; + line.tag = paddr & ~0x0000'0fff; } else { self.step(1 * 2); } } //used by the interpreter to fully emulate the instruction cache - auto fetch(u32 vaddr, u32 address, CPU& cpu) -> u32 { + auto fetch(u64 vaddr, u32 paddr, CPU& cpu) -> u32 { auto& line = this->line(vaddr); - if(!line.hit(address)) { - line.fill(address, cpu); - } else { - cpu.step(1 * 2); + if(!line.hit(paddr)) { + line.fill(paddr, cpu); } - return line.read(address); + return line.read(paddr); } auto power(bool reset) -> void { @@ -138,11 +147,11 @@ struct CPU : Thread { //16KB struct Line { - auto hit(u32 address) const -> bool { return valid && tag == (address & ~0x0000'0fff); } - auto fill(u32 address, CPU& cpu) -> void { + auto hit(u32 paddr) const -> bool { return valid && tag == (paddr & ~0x0000'0fff); } + auto fill(u32 paddr, CPU& cpu) -> void { cpu.step(48 * 2); valid = 1; - tag = address & ~0x0000'0fff; + tag = paddr & ~0x0000'0fff; cpu.busReadBurst(tag | index, words); } @@ -151,7 +160,7 @@ struct CPU : Thread { cpu.busWriteBurst(tag | index, words); } - auto read(u32 address) const -> u32 { return words[address >> 2 & 7]; } + auto read(u32 paddr) const -> u32 { return words[paddr >> 2 & 7]; } bool valid; u32 tag; @@ -163,21 +172,21 @@ struct CPU : Thread { //dcache.cpp struct DataCache { struct Line; - auto line(u32 vaddr) -> Line&; - template auto read(u32 vaddr, u32 address) -> u64; - template auto write(u32 vaddr, u32 address, u64 data) -> void; + auto line(u64 vaddr) -> Line&; + template auto read(u64 vaddr, u32 paddr) -> u64; + template auto write(u64 vaddr, u32 paddr, u64 data) -> void; auto power(bool reset) -> void; - auto readDebug(u32 vaddr, u32 address) -> u8; - auto writeDebug(u32 vaddr, u32 address, u8 value) -> void; + auto readDebug(u64 vaddr, u32 paddr) -> u8; + auto writeDebug(u64 vaddr, u32 paddr, u8 value) -> void; //8KB struct Line { - auto hit(u32 address) const -> bool; - auto fill(u32 address) -> void; + auto hit(u32 paddr) const -> bool; + auto fill(u32 paddr) -> void; auto writeBack() -> void; - template auto read(u32 address) const -> u64; - template auto write(u32 address, u64 data) -> void; + template auto read(u32 paddr) const -> u64; + template auto write(u32 paddr, u64 data) -> void; bool valid; u16 dirty; @@ -199,14 +208,6 @@ struct CPU : Thread { TLB(CPU& self) : self(self) {} static constexpr u32 Entries = 32; - struct Match { - explicit operator bool() const { return found; } - - bool found; - bool cache; - u32 address; - }; - struct Entry { //scc-tlb.cpp auto synchronize() -> void; @@ -228,12 +229,12 @@ struct CPU : Thread { } entry[TLB::Entries]; //tlb.cpp - auto load(u64 vaddr, bool noExceptions = false) -> Match; - auto load(u64 vaddr, const Entry& entry, bool noExceptions = false) -> maybe; + auto load(u64 vaddr, bool noExceptions = false) -> PhysAccess; + auto load(u64 vaddr, const Entry& entry, bool noExceptions = false) -> maybe; - auto loadFast(u64 vaddr) -> Match; - auto store(u64 vaddr) -> Match; - auto store(u64 vaddr, const Entry& entry) -> maybe; + auto loadFast(u64 vaddr) -> PhysAccess; + auto store(u64 vaddr, bool noExceptions = false) -> PhysAccess; + auto store(u64 vaddr, const Entry& entry, bool noExceptions = false) -> maybe; struct TlbCache { ; static constexpr int entries = 4; @@ -277,21 +278,28 @@ struct CPU : Thread { auto userSegment64(u64 vaddr) const -> Context::Segment; auto segment(u64 vaddr) -> Context::Segment; - auto devirtualize(u64 vaddr) -> maybe; + template auto devirtualize(u64 vaddr, bool raiseAlignedError = true, bool raiseExceptions = true) -> PhysAccess; alwaysinline auto devirtualizeFast(u64 vaddr) -> u64; auto devirtualizeDebug(u64 vaddr) -> u64; - auto fetch(u64 vaddr) -> maybe; + auto fetch(PhysAccess access) -> maybe; template auto busWrite(u32 address, u64 data) -> void; template auto busRead(u32 address) -> u64; template auto busWriteBurst(u32 address, u32 *data) -> void; template auto busReadBurst(u32 address, u32 *data) -> void; - template auto read(u64 vaddr) -> maybe; - template auto write(u64 vaddr, u64 data, bool alignedError=true) -> bool; + template auto read(PhysAccess access) -> maybe; + template auto write(PhysAccess access, u64 data) -> bool; + template auto read(u64 vaddr) -> maybe { + return read(devirtualize(vaddr)); + } + template auto write(u64 vaddr, u64 data, bool alignedError = true) -> bool { + return write(devirtualize(vaddr, alignedError), data); + } template auto vaddrAlignedError(u64 vaddr, bool write) -> bool; auto addressException(u64 vaddr) -> void; auto readDebug(u64 vaddr) -> u8; + template auto writeDebug(u64 vaddr, u64 data) -> bool; //serialization.cpp auto serialize(serializer&) -> void; diff --git a/ares/n64/cpu/dcache.cpp b/ares/n64/cpu/dcache.cpp index b871cd1146..31b4a39d07 100644 --- a/ares/n64/cpu/dcache.cpp +++ b/ares/n64/cpu/dcache.cpp @@ -1,12 +1,12 @@ -auto CPU::DataCache::Line::hit(u32 address) const -> bool { - return valid && tag == (address & ~0x0000'0fff); +auto CPU::DataCache::Line::hit(u32 paddr) const -> bool { + return valid && tag == (paddr & ~0x0000'0fff); } -auto CPU::DataCache::Line::fill(u32 address) -> void { +auto CPU::DataCache::Line::fill(u32 paddr) -> void { cpu.step(40 * 2); valid = 1; dirty = 0; - tag = address & ~0x0000'0fff; + tag = paddr & ~0x0000'0fff; fillPc = cpu.ipu.pc; cpu.busReadBurst(tag | index, words); } @@ -17,75 +17,75 @@ auto CPU::DataCache::Line::writeBack() -> void { cpu.busWriteBurst(tag | index, words); } -auto CPU::DataCache::line(u32 vaddr) -> Line& { +auto CPU::DataCache::line(u64 vaddr) -> Line& { return lines[vaddr >> 4 & 0x1ff]; } template -auto CPU::DataCache::Line::read(u32 address) const -> u64 { - if constexpr(Size == Byte) { return bytes[address >> 0 & 15 ^ 3]; } - if constexpr(Size == Half) { return halfs[address >> 1 & 7 ^ 1]; } - if constexpr(Size == Word) { return words[address >> 2 & 3 ^ 0]; } +auto CPU::DataCache::Line::read(u32 paddr) const -> u64 { + if constexpr(Size == Byte) { return bytes[paddr >> 0 & 15 ^ 3]; } + if constexpr(Size == Half) { return halfs[paddr >> 1 & 7 ^ 1]; } + if constexpr(Size == Word) { return words[paddr >> 2 & 3 ^ 0]; } if constexpr(Size == Dual) { - u64 upper = words[address >> 2 & 2 | 0]; - u64 lower = words[address >> 2 & 2 | 1]; + u64 upper = words[paddr >> 2 & 2 | 0]; + u64 lower = words[paddr >> 2 & 2 | 1]; return upper << 32 | lower << 0; } } template -auto CPU::DataCache::Line::write(u32 address, u64 data) -> void { - if constexpr(Size == Byte) { bytes[address >> 0 & 15 ^ 3] = data; } - if constexpr(Size == Half) { halfs[address >> 1 & 7 ^ 1] = data; } - if constexpr(Size == Word) { words[address >> 2 & 3 ^ 0] = data; } +auto CPU::DataCache::Line::write(u32 paddr, u64 data) -> void { + if constexpr(Size == Byte) { bytes[paddr >> 0 & 15 ^ 3] = data; } + if constexpr(Size == Half) { halfs[paddr >> 1 & 7 ^ 1] = data; } + if constexpr(Size == Word) { words[paddr >> 2 & 3 ^ 0] = data; } if constexpr(Size == Dual) { - words[address >> 2 & 2 | 0] = data >> 32; - words[address >> 2 & 2 | 1] = data >> 0; + words[paddr >> 2 & 2 | 0] = data >> 32; + words[paddr >> 2 & 2 | 1] = data >> 0; } - dirty |= ((1 << Size) - 1) << (address & 0xF); + dirty |= ((1 << Size) - 1) << (paddr & 0xF); dirtyPc = cpu.ipu.pc; } template -auto CPU::DataCache::read(u32 vaddr, u32 address) -> u64 { +auto CPU::DataCache::read(u64 vaddr, u32 paddr) -> u64 { auto& line = this->line(vaddr); - if(!line.hit(address)) { + if(!line.hit(paddr)) { if(line.valid && line.dirty) line.writeBack(); - line.fill(address); + line.fill(paddr); } else { cpu.step(1 * 2); } - return line.read(address); + return line.read(paddr); } -auto CPU::DataCache::readDebug(u32 vaddr, u32 address) -> u8 { +auto CPU::DataCache::readDebug(u64 vaddr, u32 paddr) -> u8 { auto& line = this->line(vaddr); - if(!line.hit(address)) { + if(!line.hit(paddr)) { Thread dummyThread{}; - return bus.read(address, dummyThread, "Ares Debugger"); + return bus.read(paddr, dummyThread, "Ares Debugger"); } - return line.read(address); + return line.read(paddr); } template -auto CPU::DataCache::write(u32 vaddr, u32 address, u64 data) -> void { +auto CPU::DataCache::write(u64 vaddr, u32 paddr, u64 data) -> void { auto& line = this->line(vaddr); - if(!line.hit(address)) { + if(!line.hit(paddr)) { if(line.valid && line.dirty) line.writeBack(); - line.fill(address); + line.fill(paddr); } else { cpu.step(1 * 2); } - line.write(address, data); + line.write(paddr, data); } -auto CPU::DataCache::writeDebug(u32 vaddr, u32 address, u8 data) -> void { +auto CPU::DataCache::writeDebug(u64 vaddr, u32 paddr, u8 data) -> void { auto& line = this->line(vaddr); - if(!line.hit(address)) { + if(!line.hit(paddr)) { Thread dummyThread{}; - return bus.write(address, data, dummyThread, "Ares Debugger"); + return bus.write(paddr, data, dummyThread, "Ares Debugger"); } - line.write(address, data); + line.write(paddr, data); } auto CPU::DataCache::power(bool reset) -> void { @@ -100,4 +100,4 @@ auto CPU::DataCache::power(bool reset) -> void { } template -auto CPU::DataCache::Line::write(u32 address, u64 data) -> void; +auto CPU::DataCache::Line::write(u32 paddr, u64 data) -> void; diff --git a/ares/n64/cpu/interpreter-ipu.cpp b/ares/n64/cpu/interpreter-ipu.cpp index ed98f9a35f..d0b77213c9 100644 --- a/ares/n64/cpu/interpreter-ipu.cpp +++ b/ares/n64/cpu/interpreter-ipu.cpp @@ -118,27 +118,26 @@ auto CPU::BREAK() -> void { } auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { - u32 address; - if (auto phys = devirtualize(rs.u64 + imm)) address = *phys; - else return; + auto access = devirtualize(rs.u64 + imm); + if (!access) return; switch(operation) { case 0x00: { //icache index invalidate - auto& line = icache.line(address); + auto& line = icache.line(access.vaddr); line.valid = 0; break; } case 0x04: { //icache load tag - auto& line = icache.line(address); + auto& line = icache.line(access.vaddr); scc.tagLo.primaryCacheState = line.valid << 1; scc.tagLo.physicalAddress = line.tag; break; } case 0x08: { //icache store tag - auto& line = icache.line(address); + auto& line = icache.line(access.vaddr); line.valid = scc.tagLo.primaryCacheState.bit(1); line.tag = scc.tagLo.physicalAddress; if(scc.tagLo.primaryCacheState == 0b01) debug(unusual, "[CPU] CACHE CPCS=1"); @@ -147,39 +146,39 @@ auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { } case 0x10: { //icache hit invalidate - auto& line = icache.line(address); - if(line.hit(address)) line.valid = 0; + auto& line = icache.line(access.vaddr); + if(line.hit(access.paddr)) line.valid = 0; break; } case 0x14: { //icache fill - auto& line = icache.line(address); - line.fill(address, cpu); + auto& line = icache.line(access.vaddr); + line.fill(access.paddr, cpu); break; } case 0x18: { //icache hit write back - auto& line = icache.line(address); - if(line.hit(address)) line.writeBack(cpu); + auto& line = icache.line(access.vaddr); + if(line.hit(access.paddr)) line.writeBack(cpu); break; } case 0x01: { //dcache index write back invalidate - auto& line = dcache.line(address); + auto& line = dcache.line(access.vaddr); if(line.valid && line.dirty) line.writeBack(); line.valid = 0; break; } case 0x05: { //dcache index load tag - auto& line = dcache.line(address); + auto& line = dcache.line(access.vaddr); scc.tagLo.primaryCacheState = line.valid << 1 | line.dirty << 0; scc.tagLo.physicalAddress = line.tag; break; } case 0x09: { //dcache index store tag - auto& line = dcache.line(address); + auto& line = dcache.line(access.vaddr); line.valid = scc.tagLo.primaryCacheState.bit(1); line.dirty = scc.tagLo.primaryCacheState.bit(0); line.tag = scc.tagLo.physicalAddress; @@ -189,17 +188,17 @@ auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { } case 0x0d: { //dcache create dirty exclusive - auto& line = dcache.line(address); - if(!line.hit(address) && line.dirty) line.writeBack(); - line.tag = address & ~0xfff; + auto& line = dcache.line(access.vaddr); + if(!line.hit(access.paddr) && line.dirty) line.writeBack(); + line.tag = access.paddr & ~0xfff; line.valid = 1; line.dirty = 1; break; } case 0x11: { //dcache hit invalidate - auto& line = dcache.line(address); - if(line.hit(address)) { + auto& line = dcache.line(access.vaddr); + if(line.hit(access.paddr)) { line.valid = 0; line.dirty = 0; } @@ -207,8 +206,8 @@ auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { } case 0x15: { //dcache hit write back invalidate - auto& line = dcache.line(address); - if(line.hit(address)) { + auto& line = dcache.line(access.vaddr); + if(line.hit(access.paddr)) { if(line.dirty) line.writeBack(); line.valid = 0; } @@ -216,8 +215,8 @@ auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { } case 0x19: { //dcache hit write back - auto& line = dcache.line(address); - if(line.hit(address)) { + auto& line = dcache.line(access.vaddr); + if(line.hit(access.paddr)) { if(line.dirty) line.writeBack(); } break; @@ -610,10 +609,10 @@ auto CPU::LHU(r64& rt, cr64& rs, s16 imm) -> void { } auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void { - if(auto address = devirtualize(rs.u64 + imm)) { - if (auto data = read(rs.u64 + imm)) { + if(auto access = devirtualize(rs.u64 + imm)) { + if (auto data = read(access.vaddr)) { rt.u64 = s32(*data); - scc.ll = *address >> 4; + scc.ll = access.paddr >> 4; scc.llbit = 1; } } @@ -621,10 +620,10 @@ auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void { auto CPU::LLD(r64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - if(auto address = devirtualize(rs.u64 + imm)) { - if (auto data = read(rs.u64 + imm)) { + if(auto access = devirtualize(rs.u64 + imm)) { + if (auto data = read(access.vaddr)) { rt.u64 = *data; - scc.ll = *address >> 4; + scc.ll = access.paddr >> 4; scc.llbit = 1; } } diff --git a/ares/n64/cpu/memory.cpp b/ares/n64/cpu/memory.cpp index e319d26360..44fbc720ec 100644 --- a/ares/n64/cpu/memory.cpp +++ b/ares/n64/cpu/memory.cpp @@ -88,23 +88,36 @@ auto CPU::segment(u64 vaddr) -> Context::Segment { unreachable; } -auto CPU::devirtualize(u64 vaddr) -> maybe { - if(vaddrAlignedError(vaddr, false)) return nothing; +template +auto CPU::devirtualize(u64 vaddr, bool raiseAlignedError, bool raiseExceptions) -> PhysAccess { + if (raiseAlignedError && vaddrAlignedError(vaddr, Dir == Write)) { + return PhysAccess{false}; + } + //fast path for RDRAM, which is by far the most accessed memory region + if (vaddr >= 0xffff'ffff'8000'0000ull && vaddr <= 0xffff'ffff'83ef'ffffull) { + if constexpr(Dir == Read) return PhysAccess{true, true, (u32)vaddr & 0x3eff'ffff, vaddr}; + if constexpr(Dir == Write) return PhysAccess{true, true, (u32)vaddr & 0x3eff'ffff, vaddr}; + } switch(segment(vaddr)) { case Context::Segment::Unused: - addressException(vaddr); - exception.addressLoad(); - return nothing; + if(raiseExceptions) { + addressException(vaddr); + if constexpr(Dir == Read) exception.addressLoad(); + if constexpr(Dir == Write) exception.addressStore(); + } + return PhysAccess{false}; case Context::Segment::Mapped: - if(auto match = tlb.load(vaddr)) return match.address & context.physMask; - addressException(vaddr); - return nothing; + if constexpr(Dir == Read) if(auto access = tlb.load (vaddr, !raiseExceptions)) return access; + if constexpr(Dir == Write) if(auto access = tlb.store(vaddr, !raiseExceptions)) return access; + return PhysAccess{false}; case Context::Segment::Cached: + return PhysAccess{true, true, (u32)(vaddr & 0x1fff'ffff), vaddr}; case Context::Segment::Direct: - return vaddr & 0x1fff'ffff; + return PhysAccess{true, false, (u32)(vaddr & 0x1fff'ffff), vaddr}; case Context::Segment::Cached32: + return PhysAccess{true, true, (u32)(vaddr & 0xffff'ffff), vaddr}; case Context::Segment::Direct32: - return vaddr & 0xffff'ffff; + return PhysAccess{true, false, (u32)(vaddr & 0xffff'ffff), vaddr}; } unreachable; } @@ -128,7 +141,7 @@ auto CPU::devirtualizeFast(u64 vaddr) -> u64 { switch(segment(vaddr)) { case Context::Segment::Mapped: { auto match = tlb.loadFast(vaddr); - return devirtualizeCache.pbase = match.address & context.physMask; + return devirtualizeCache.pbase = match.paddr & context.physMask; } case Context::Segment::Cached: case Context::Segment::Direct: @@ -164,132 +177,45 @@ inline auto CPU::busReadBurst(u32 address, u32 *data) -> void { return bus.readBurst(address, data, *this); } -auto CPU::fetch(u64 vaddr) -> maybe { - if(vaddrAlignedError(vaddr, false)) return nothing; - switch(segment(vaddr)) { - case Context::Segment::Unused: - step(1 * 2); - addressException(vaddr); - exception.addressLoad(); - return nothing; - case Context::Segment::Mapped: - if(auto match = tlb.load(vaddr)) { - if(match.cache) return icache.fetch(vaddr, match.address & context.physMask, cpu); - step(1 * 2); - return busRead(match.address & context.physMask); - } - step(1 * 2); - addressException(vaddr); - return nothing; - case Context::Segment::Cached: - return icache.fetch(vaddr, vaddr & 0x1fff'ffff, cpu); - case Context::Segment::Cached32: - return icache.fetch(vaddr, vaddr & 0xffff'ffff, cpu); - case Context::Segment::Direct: - step(1 * 2); - return busRead(vaddr & 0x1fff'ffff); - case Context::Segment::Direct32: - step(1 * 2); - return busRead(vaddr & 0xffff'ffff); - } - - unreachable; +auto CPU::fetch(PhysAccess access) -> maybe { + step(1 * 2); + if(!access) return nothing; + if(access.cache) return icache.fetch(access.vaddr, access.paddr, cpu); + return busRead(access.paddr); } template -auto CPU::read(u64 vaddr) -> maybe { - if(vaddrAlignedError(vaddr, false)) return nothing; - GDB::server.reportMemRead(vaddr, Size); - - switch(segment(vaddr)) { - case Context::Segment::Unused: - step(1 * 2); - addressException(vaddr); - exception.addressLoad(); - return nothing; - case Context::Segment::Mapped: - if(auto match = tlb.load(vaddr)) { - if(match.cache) return dcache.read(vaddr, match.address & context.physMask); - step(1 * 2); - return busRead(match.address & context.physMask); - } - step(1 * 2); - addressException(vaddr); - return nothing; - case Context::Segment::Cached: - return dcache.read(vaddr, vaddr & 0x1fff'ffff); - case Context::Segment::Cached32: - return dcache.read(vaddr, vaddr & 0xffff'ffff); - case Context::Segment::Direct: - step(1 * 2); - return busRead(vaddr & 0x1fff'ffff); - case Context::Segment::Direct32: - step(1 * 2); - return busRead(vaddr & 0xffff'ffff); - } - - unreachable; +auto CPU::read(PhysAccess access) -> maybe { + if(!access) return nothing; + GDB::server.reportMemRead(access.vaddr, Size); + if(access.cache) return dcache.read(access.vaddr, access.paddr); + return busRead(access.paddr); } auto CPU::readDebug(u64 vaddr) -> u8 { Thread dummyThread{}; + auto access = devirtualize(vaddr, false, false); + if(!access) return 0; + if(access.cache) return dcache.readDebug(access.vaddr, access.paddr); + return bus.read(access.paddr, dummyThread, "Ares Debugger"); +} - switch(segment(vaddr)) { - case Context::Segment::Unused: return 0; - case Context::Segment::Mapped: - if(auto match = tlb.load(vaddr, true)) { - if(match.cache) return dcache.readDebug(vaddr, match.address & context.physMask); - return bus.read(match.address & context.physMask, dummyThread, "Ares Debugger"); - } - return 0; - case Context::Segment::Cached: - return dcache.readDebug(vaddr, vaddr & 0x1fff'ffff); - case Context::Segment::Cached32: - return dcache.readDebug(vaddr, vaddr & 0xffff'ffff); - case Context::Segment::Direct: - return bus.read(vaddr & 0x1fff'ffff, dummyThread, "Ares Debugger"); - case Context::Segment::Direct32: - return bus.read(vaddr & 0xffff'ffff, dummyThread, "Ares Debugger"); - } - unreachable; +template +auto CPU::write(PhysAccess access, u64 data) -> bool { + if(!access) return false; + GDB::server.reportMemWrite(access.vaddr, Size); + if(access.cache) return dcache.write(access.vaddr, access.paddr, data), true; + return busWrite(access.paddr, data), true; } template -auto CPU::write(u64 vaddr0, u64 data, bool alignedError) -> bool { - if(alignedError && vaddrAlignedError(vaddr0, true)) return false; - u64 vaddr = vaddr0 & ~((u64)Size - 1); - - GDB::server.reportMemWrite(vaddr0, Size); - - switch(segment(vaddr)) { - case Context::Segment::Unused: - step(1 * 2); - addressException(vaddr0); - exception.addressStore(); - return false; - case Context::Segment::Mapped: - if(auto match = tlb.store(vaddr)) { - if(match.cache) return dcache.write(vaddr, match.address & context.physMask, data), true; - step(1 * 2); - return busWrite(match.address & context.physMask, data), true; - } - step(1 * 2); - addressException(vaddr0); - return false; - case Context::Segment::Cached: - return dcache.write(vaddr, vaddr & 0x1fff'ffff, data), true; - case Context::Segment::Cached32: - return dcache.write(vaddr, vaddr & 0xffff'ffff, data), true; - case Context::Segment::Direct: - step(1 * 2); - return busWrite(vaddr & 0x1fff'ffff, data), true; - case Context::Segment::Direct32: - step(1 * 2); - return busWrite(vaddr & 0xffff'ffff, data), true; - } - - unreachable; +auto CPU::writeDebug(u64 vaddr, u64 data) -> bool { + auto access = devirtualize(vaddr, false, false); + if(!access) return false; + GDB::server.reportMemWrite(access.vaddr, Size); + if(access.cache) return dcache.write(access.vaddr, access.paddr, data), true; + return busWrite(access.paddr, data), true; } template @@ -321,3 +247,8 @@ auto CPU::addressException(u64 vaddr) -> void { scc.xcontext.badVirtualAddress = vaddr >> 13; scc.xcontext.region = vaddr >> 62; } + +template auto CPU::writeDebug(u64, u64) -> bool; +template auto CPU::writeDebug(u64, u64) -> bool; +template auto CPU::writeDebug(u64, u64) -> bool; +template auto CPU::writeDebug(u64, u64) -> bool; diff --git a/ares/n64/cpu/tlb.cpp b/ares/n64/cpu/tlb.cpp index d596179a09..088ecdb8a4 100644 --- a/ares/n64/cpu/tlb.cpp +++ b/ares/n64/cpu/tlb.cpp @@ -1,23 +1,23 @@ -auto CPU::TLB::load(u64 vaddr, const Entry& entry, bool noExceptions) -> maybe { +auto CPU::TLB::load(u64 vaddr, const Entry& entry, bool noExceptions) -> maybe { if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) return nothing; if((vaddr & entry.addressMaskHi) != entry.virtualAddress) return nothing; if(vaddr >> 62 != entry.region) return nothing; bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { - if(noExceptions)return Match{false}; + if(noExceptions)return PhysAccess{false}; self.addressException(vaddr); self.debugger.tlbLoadInvalid(vaddr); self.exception.tlbLoadInvalid(); - return Match{false}; + return PhysAccess{false}; } physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); self.debugger.tlbLoad(vaddr, physicalAddress); - return Match{true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + return PhysAccess{true, entry.cacheAlgorithm[lo] != 2, physicalAddress, vaddr}; } -auto CPU::TLB::load(u64 vaddr, bool noExceptions) -> Match { +auto CPU::TLB::load(u64 vaddr, bool noExceptions) -> PhysAccess { for(auto& entry : this->tlbCache.entry) { if(!entry.entry) continue; if(auto match = load(vaddr, *entry.entry, noExceptions)) { @@ -43,7 +43,7 @@ auto CPU::TLB::load(u64 vaddr, bool noExceptions) -> Match { // Fast(er) version of load for recompiler icache lookups // avoids exceptions/debug checks -auto CPU::TLB::loadFast(u64 vaddr) -> Match { +auto CPU::TLB::loadFast(u64 vaddr) -> PhysAccess { for(auto& entry : this->entry) { if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue; @@ -51,35 +51,39 @@ auto CPU::TLB::loadFast(u64 vaddr) -> Match { bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) return { false, 0, 0 }; physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); - return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress, vaddr}; } return {false, 0, 0}; } -auto CPU::TLB::store(u64 vaddr, const Entry& entry) -> maybe { +auto CPU::TLB::store(u64 vaddr, const Entry& entry, bool noExceptions) -> maybe { if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) return nothing; if((vaddr & entry.addressMaskHi) != entry.virtualAddress) return nothing; if(vaddr >> 62 != entry.region) return nothing; bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { - self.addressException(vaddr); - self.debugger.tlbStoreInvalid(vaddr); - self.exception.tlbStoreInvalid(); - return Match{false}; + if(!noExceptions) { + self.addressException(vaddr); + self.debugger.tlbStoreInvalid(vaddr); + self.exception.tlbStoreInvalid(); + } + return PhysAccess{false}; } if(!entry.dirty[lo]) { - self.addressException(vaddr); - self.debugger.tlbModification(vaddr); - self.exception.tlbModification(); - return Match{false}; + if(!noExceptions) { + self.addressException(vaddr); + self.debugger.tlbModification(vaddr); + self.exception.tlbModification(); + } + return PhysAccess{false}; } physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); self.debugger.tlbStore(vaddr, physicalAddress); - return Match{true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + return PhysAccess{true, entry.cacheAlgorithm[lo] != 2, physicalAddress, vaddr}; } -auto CPU::TLB::store(u64 vaddr) -> Match { +auto CPU::TLB::store(u64 vaddr, bool noExceptions) -> PhysAccess { for(auto& entry : this->tlbCache.entry) { if(!entry.entry) continue; if(auto match = store(vaddr, *entry.entry)) { @@ -89,15 +93,17 @@ auto CPU::TLB::store(u64 vaddr) -> Match { } for(auto& entry : this->entry) { - if(auto match = store(vaddr, entry)) { + if(auto match = store(vaddr, entry, noExceptions)) { this->tlbCache.insert(entry); return *match; } } - self.addressException(vaddr); - self.debugger.tlbStoreMiss(vaddr); - self.exception.tlbStoreMiss(); + if(!noExceptions) { + self.addressException(vaddr); + self.debugger.tlbStoreMiss(vaddr); + self.exception.tlbStoreMiss(); + } return {false}; } diff --git a/ares/n64/system/system.cpp b/ares/n64/system/system.cpp index 109f5c99f4..92b1bf748e 100644 --- a/ares/n64/system/system.cpp +++ b/ares/n64/system/system.cpp @@ -158,20 +158,20 @@ auto System::initDebugHooks() -> void { switch(data.size()) { case Byte: value = (u64)data[0]; - cpu.write(address, value, false); + cpu.writeDebug(address, value); break; case Half: value = ((u64)data[0]<<8) | ((u64)data[1]<<0); - cpu.write(address, value, false); + cpu.writeDebug(address, value); break; case Word: value = ((u64)data[0]<<24) | ((u64)data[1]<<16) | ((u64)data[2]<<8) | ((u64)data[3]<<0); - cpu.write(address, value, false); + cpu.writeDebug(address, value); break; case Dual: value = ((u64)data[0]<<56) | ((u64)data[1]<<48) | ((u64)data[2]<<40) | ((u64)data[3]<<32); value |= ((u64)data[4]<<24) | ((u64)data[5]<<16) | ((u64)data[6]<< 8) | ((u64)data[7]<< 0); - cpu.write(address, value, false); + cpu.writeDebug(address, value); break; default: // Handle writes of different sizes only within the RDRAM area, where diff --git a/nall/gdb/server.cpp b/nall/gdb/server.cpp index 39ce11323c..2c415f2cf4 100644 --- a/nall/gdb/server.cpp +++ b/nall/gdb/server.cpp @@ -193,7 +193,7 @@ namespace nall::GDB { u64 address = cmdName.slice(1, sepIdx-1).hex(); u64 byteSize = cmdName.slice(sepIdx+1, 1).hex(); - string hexvalue = cmdParts.size() > 1 ? cmdParts[1] : ""; + string hexvalue = cmdParts.size() > 1 ? cmdParts[1] : string{}; vector value; string hexbyte; for (int i=0; i