-
Notifications
You must be signed in to change notification settings - Fork 130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add N64 recompiler block hashes & inline 64-bit ops #1640
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,9 @@ auto CPU::Context::setMode() -> void { | |
break; | ||
} | ||
|
||
jit.update(*this, self); | ||
jitBits = jit.toBits(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should update() be responsible for recalculating jitBits? |
||
|
||
if(bits == 32) { | ||
physMask = 0x1fff'ffff; | ||
segment[0] = Segment::Mapped; | ||
|
@@ -63,3 +66,22 @@ auto CPU::Context::setMode() -> void { | |
} | ||
} | ||
} | ||
|
||
auto CPU::Context::JIT::update(const Context& ctx, const CPU& cpu) -> void { | ||
singleInstruction = GDB::server.hasBreakpoints(); | ||
endian = Context::Endian(ctx.endian); | ||
mode = Context::Mode(ctx.mode); | ||
cop1Enabled = cpu.scc.status.enable.coprocessor1 > 0; | ||
floatingPointMode = cpu.scc.status.floatingPointMode > 0; | ||
is64bit = ctx.bits == 64; | ||
} | ||
|
||
auto CPU::Context::JIT::toBits() const -> u32 { | ||
u32 bits = singleInstruction ? 1 << 6 : 0; | ||
bits |= endian ? 1 << 7 : 0; | ||
bits |= (mode & 0x03) << 9; | ||
bits |= cop1Enabled ? 1 << 10 : 0; | ||
bits |= floatingPointMode ? 1 << 11 : 0; | ||
bits |= is64bit ? 1 << 12 : 0; | ||
return bits; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,6 +92,18 @@ struct CPU : Thread { | |
enum Mode : u32 { Kernel, Supervisor, User }; | ||
enum Segment : u32 { Unused, Mapped, Cached, Direct, Cached32, Direct32, Kernel64, Supervisor64, User64 }; | ||
|
||
struct JIT { | ||
bool singleInstruction; | ||
Endian endian; | ||
Mode mode; | ||
bool cop1Enabled; | ||
bool floatingPointMode; | ||
bool is64bit; | ||
|
||
auto update(const Context& ctx, const CPU& cpu) -> void; | ||
auto toBits() const -> u32; | ||
}; | ||
|
||
auto littleEndian() const -> bool { return endian == Endian::Little; } | ||
auto bigEndian() const -> bool { return endian == Endian::Big; } | ||
|
||
|
@@ -106,6 +118,8 @@ struct CPU : Thread { | |
u32 mode; | ||
u32 bits; | ||
u32 segment[8]; //512_MiB chunks | ||
u32 jitBits; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The rest of the recompiler state is in the recompiler object. Is there a reason you want to keep this state separate? |
||
Context::JIT jit; | ||
} context{*this}; | ||
|
||
//icache.cpp | ||
|
@@ -863,7 +877,11 @@ struct CPU : Thread { | |
}; | ||
|
||
struct Pool { | ||
Block* blocks[1 << 6]; | ||
struct Row { | ||
Block* block; | ||
u32 tag; | ||
}; | ||
Row rows[1 << 6]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How did you arrive at 64 rows? Were other numbers tried? |
||
}; | ||
|
||
auto reset() -> void { | ||
|
@@ -899,12 +917,16 @@ struct CPU : Thread { | |
} | ||
|
||
auto pool(u32 address) -> Pool*; | ||
auto block(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*; | ||
auto computePoolKey(u32 address, u32 ctxHash) -> u32; | ||
auto computePoolRow(u32 key) -> u32; | ||
auto block(u64 vaddr, u32 address, const Context& ctx) -> Block*; | ||
|
||
auto emit(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*; | ||
auto emit(u64 vaddr, u32 address, Context::JIT ctx) -> Block*; | ||
auto emitOverflowCheck(reg temp) -> sljit_jump*; | ||
auto emitZeroClear(u32 n) -> void; | ||
auto emitEXECUTE(u32 instruction) -> bool; | ||
auto emitSPECIAL(u32 instruction) -> bool; | ||
auto checkDualAllowed(const Context::JIT& ctx) -> bool; | ||
auto emitEXECUTE(u32 instruction, Context::JIT ctx) -> bool; | ||
auto emitSPECIAL(u32 instruction, Context::JIT ctx) -> bool; | ||
auto emitREGIMM(u32 instruction) -> bool; | ||
auto emitSCC(u32 instruction) -> bool; | ||
auto emitFPU(u32 instruction) -> bool; | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -9,10 +9,34 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* { | |||||||
return pool; | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> Block* { | ||||||||
if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block; | ||||||||
auto block = emit(vaddr, address, singleInstruction); | ||||||||
pool(address)->blocks[address >> 2 & 0x3f] = block; | ||||||||
auto CPU::Recompiler::computePoolKey(u32 address, u32 jitBits) -> u32 { | ||||||||
return (address >> 2 & 0x3f) | (jitBits & ~0x3f); | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::computePoolRow(u32 key) -> u32 { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The n64 core already pulls in xxhash.h, so XXH32_avalanche might work here as well. |
||||||||
// Jon Maiga's 'xmx' mixer, see https://jonkagstrom.com/bit-mixer-construction/ | ||||||||
u64 x = key; | ||||||||
x ^= x >> 23; | ||||||||
x *= 0xff51afd7ed558ccdull; | ||||||||
x ^= x >> 23; | ||||||||
u32 row = x & 0x3f; | ||||||||
assert(row < sizeof(Pool::rows)/sizeof(Pool::rows[0])); | ||||||||
return row; | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::block(u64 vaddr, u32 address, const Context& ctx) -> Block* { | ||||||||
u32 key = computePoolKey(address, ctx.jitBits); | ||||||||
u32 row = computePoolRow(key); | ||||||||
|
||||||||
if (pool(address)->rows[row].tag == key) { | ||||||||
if (auto block = pool(address)->rows[row].block) { | ||||||||
return block; | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
memory::jitprotect(false); | ||||||||
auto block = emit(vaddr, address, ctx.jit); | ||||||||
pool(address)->rows[row] = {.block = block, .tag = key}; | ||||||||
memory::jitprotect(true); | ||||||||
return block; | ||||||||
} | ||||||||
|
@@ -21,7 +45,7 @@ auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> B | |||||||
#define IpuReg(r) sreg(1), offsetof(IPU, r) - IpuBase | ||||||||
#define PipelineReg(x) mem(sreg(0), offsetof(CPU, pipeline) + offsetof(Pipeline, x)) | ||||||||
|
||||||||
auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Block* { | ||||||||
auto CPU::Recompiler::emit(u64 vaddr, u32 address, Context::JIT ctx) -> Block* { | ||||||||
if(unlikely(allocator.available() < 1_MiB)) { | ||||||||
print("CPU allocator flush\n"); | ||||||||
allocator.release(); | ||||||||
|
@@ -46,7 +70,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Bl | |||||||
mov32(reg(2), imm(instruction)); | ||||||||
call(&CPU::instructionPrologue); | ||||||||
} | ||||||||
bool branched = emitEXECUTE(instruction); | ||||||||
bool branched = emitEXECUTE(instruction, ctx); | ||||||||
if(unlikely(instruction == branchToSelf || instruction == jumpToSelf)) { | ||||||||
//accelerate idle loops | ||||||||
mov32(reg(1), imm(64 * 2)); | ||||||||
|
@@ -60,7 +84,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Bl | |||||||
vaddr += 4; | ||||||||
address += 4; | ||||||||
jumpToSelf += 4; | ||||||||
if(hasBranched || (address & 0xfc) == 0 || singleInstruction) break; //block boundary | ||||||||
if(hasBranched || (address & 0xfc) == 0 || ctx.singleInstruction) break; //block boundary | ||||||||
hasBranched = branched; | ||||||||
jumpEpilog(flag_nz); | ||||||||
} | ||||||||
|
@@ -103,12 +127,31 @@ auto CPU::Recompiler::emitZeroClear(u32 n) -> void { | |||||||
if(n == 0) mov64(mem(IpuReg(r[0])), imm(0)); | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { | ||||||||
auto CPU::Recompiler::emitOverflowCheck(reg temp) -> sljit_jump* { | ||||||||
// If overflow flag set: throw an exception, skip the instruction via the 'end' label. | ||||||||
mov32_f(temp, flag_o); | ||||||||
auto didntOverflow = cmp32_jump(temp, imm(0), flag_eq); | ||||||||
Comment on lines
+132
to
+133
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
call(&CPU::Exception::arithmeticOverflow, &cpu.exception); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Emitting calls this way causes all the parameters to be emitted as immediates. In general it's cheaper (in terms of code footprint) to calculate addresses that are passed as arguments (see instances of |
||||||||
auto end = jump(); | ||||||||
setLabel(didntOverflow); | ||||||||
return end; | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::checkDualAllowed(const Context::JIT& ctx) -> bool { | ||||||||
if (ctx.mode != Context::Mode::Kernel && !ctx.is64bit) { | ||||||||
call(&CPU::Exception::reservedInstruction, &self.exception); | ||||||||
return false; | ||||||||
} | ||||||||
|
||||||||
return true; | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::emitEXECUTE(u32 instruction, Context::JIT ctx) -> bool { | ||||||||
switch(instruction >> 26) { | ||||||||
|
||||||||
//SPECIAL | ||||||||
case 0x00: { | ||||||||
return emitSPECIAL(instruction); | ||||||||
return emitSPECIAL(instruction, ctx); | ||||||||
} | ||||||||
|
||||||||
//REGIMM | ||||||||
|
@@ -284,21 +327,19 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { | |||||||
|
||||||||
//DADDI Rt,Rs,i16 | ||||||||
case 0x18: { | ||||||||
lea(reg(1), Rt); | ||||||||
lea(reg(2), Rs); | ||||||||
mov32(reg(3), imm(i16)); | ||||||||
call(&CPU::DADDI); | ||||||||
emitZeroClear(Rtn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
add64(reg(0), mem(Rs), imm(i16), set_o); | ||||||||
auto skip = emitOverflowCheck(reg(2)); | ||||||||
if(Rtn > 0) mov64(mem(Rt), reg(0)); | ||||||||
setLabel(skip); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DADDIU Rt,Rs,i16 | ||||||||
case 0x19: { | ||||||||
lea(reg(1), Rt); | ||||||||
lea(reg(2), Rs); | ||||||||
mov32(reg(3), imm(i16)); | ||||||||
call(&CPU::DADDIU); | ||||||||
emitZeroClear(Rtn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
add64(reg(0), mem(Rs), imm(i16), set_o); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set_o not needed |
||||||||
if(Rtn > 0) mov64(mem(Rt), reg(0)); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The add64 can also be skipped if Rtn is zero. |
||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
@@ -616,7 +657,7 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { | |||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | ||||||||
auto CPU::Recompiler::emitSPECIAL(u32 instruction, Context::JIT ctx) -> bool { | ||||||||
switch(instruction & 0x3f) { | ||||||||
|
||||||||
//SLL Rd,Rt,Sa | ||||||||
|
@@ -760,11 +801,10 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | |||||||
|
||||||||
//DSLLV Rd,Rt,Rs | ||||||||
case 0x14: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
lea(reg(3), Rs); | ||||||||
call(&CPU::DSLLV); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
and64(reg(0), mem(Rs32), imm(63)); | ||||||||
shl64(mem(Rd), mem(Rt), reg(0)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
@@ -776,21 +816,19 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | |||||||
|
||||||||
//DSRLV Rd,Rt,Rs | ||||||||
case 0x16: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
lea(reg(3), Rs); | ||||||||
call(&CPU::DSRLV); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
and64(reg(0), mem(Rs32), imm(63)); | ||||||||
lshr64(mem(Rd), mem(Rt), reg(0)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DSRAV Rd,Rt,Rs | ||||||||
case 0x17: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
lea(reg(3), Rs); | ||||||||
call(&CPU::DSRAV); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
and64(reg(0), mem(Rs32), imm(63)); | ||||||||
ashr64(mem(Rd), mem(Rt), reg(0)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
@@ -950,41 +988,42 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | |||||||
|
||||||||
//DADD Rd,Rs,Rt | ||||||||
case 0x2c: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rs); | ||||||||
lea(reg(3), Rt); | ||||||||
call(&CPU::DADD); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
add64(reg(0), mem(Rs), mem(Rt), set_o); | ||||||||
auto skip = emitOverflowCheck(reg(2)); | ||||||||
if(Rdn > 0) mov64(mem(Rd), reg(0)); | ||||||||
setLabel(skip); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DADDU Rd,Rs,Rt | ||||||||
case 0x2d: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rs); | ||||||||
lea(reg(3), Rt); | ||||||||
call(&CPU::DADDU); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) { | ||||||||
return 1; | ||||||||
} | ||||||||
|
||||||||
if(Rdn == 0) return 0; | ||||||||
|
||||||||
add64(reg(0), mem(Rs), mem(Rt)); | ||||||||
mov64(mem(Rd), reg(0)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DSUB Rd,Rs,Rt | ||||||||
case 0x2e: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rs); | ||||||||
lea(reg(3), Rt); | ||||||||
call(&CPU::DSUB); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
sub64(reg(0), mem(Rs), mem(Rt), set_o); | ||||||||
auto skip = emitOverflowCheck(reg(2)); | ||||||||
if(Rdn > 0) mov64(mem(Rd), reg(0)); | ||||||||
setLabel(skip); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DSUBU Rd,Rs,Rt | ||||||||
case 0x2f: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rs); | ||||||||
lea(reg(3), Rt); | ||||||||
call(&CPU::DSUBU); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
sub64(reg(0), mem(Rs), mem(Rt), set_o); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set_o not needed |
||||||||
if(Rdn > 0) mov64(mem(Rd), reg(0)); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The sub64 can be skipped in Rdn is zero. |
||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
@@ -1050,11 +1089,9 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | |||||||
|
||||||||
//DSLL Rd,Rt,Sa | ||||||||
case 0x38: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
mov32(reg(3), imm(Sa)); | ||||||||
call(&CPU::DSLL); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
shl64(mem(Rd), mem(Rt), imm(Sa)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
@@ -1076,21 +1113,17 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { | |||||||
|
||||||||
//DSRA Rd,Rt,Sa | ||||||||
case 0x3b: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
mov32(reg(3), imm(Sa)); | ||||||||
call(&CPU::DSRA); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
ashr64(mem(Rd), mem(Rt), imm(Sa)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
//DSLL32 Rd,Rt,Sa | ||||||||
case 0x3c: { | ||||||||
lea(reg(1), Rd); | ||||||||
lea(reg(2), Rt); | ||||||||
mov32(reg(3), imm(Sa+32)); | ||||||||
call(&CPU::DSLL); | ||||||||
emitZeroClear(Rdn); | ||||||||
if (!checkDualAllowed(ctx)) return 1; | ||||||||
if (Rdn == 0) return 0; | ||||||||
shl64(mem(Rd), mem(Rt), imm(Sa+32)); | ||||||||
return 0; | ||||||||
} | ||||||||
|
||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We also need to make sure this is updated after a save state load.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also when the breakpoint count changes.