From 4c16397cf4b5b24916e73f36c05d4554c9948a38 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 4 Nov 2022 01:31:13 +0100 Subject: [PATCH 01/64] Latte: Fix race condition on close during game boot --- src/Cafe/HW/Latte/Core/LatteThread.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Cafe/HW/Latte/Core/LatteThread.cpp b/src/Cafe/HW/Latte/Core/LatteThread.cpp index 8874ecf45..295057da7 100644 --- a/src/Cafe/HW/Latte/Core/LatteThread.cpp +++ b/src/Cafe/HW/Latte/Core/LatteThread.cpp @@ -235,6 +235,8 @@ void Latte_Start() void Latte_Stop() { std::unique_lock _lock(sLatteThreadStateMutex); + if (!sLatteThreadRunning) + return; sLatteThreadRunning = false; _lock.unlock(); sLatteThread.join(); From f523b2152d1bb9b505743c17ee542fa08f4adb70 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 4 Nov 2022 01:35:12 +0100 Subject: [PATCH 02/64] PPCRec: Use vector for segment list + deduplicate RA file --- src/Cafe/CMakeLists.txt | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 3 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 7 +- .../Recompiler/PPCRecompilerImlGen.cpp | 246 +++++----- .../Recompiler/PPCRecompilerImlOptimizer.cpp | 115 +++-- .../PPCRecompilerImlRegisterAllocator.cpp | 440 +++++++++++++++++- .../PPCRecompilerImlRegisterAllocator2.cpp | 414 ---------------- .../Recompiler/PPCRecompilerIntermediate.cpp | 27 +- .../Espresso/Recompiler/PPCRecompilerX64.cpp | 17 +- 9 files changed, 615 insertions(+), 656 deletions(-) delete mode 100644 src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 91d257b2e..58b4ba8bc 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -74,7 +74,7 @@ add_library(CemuCafe HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.h - HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp + HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 24e87bd16..78d8fad99 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -173,9 +173,8 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // collect list of PPC-->x64 entry points entryPointsOut.clear(); - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) + for(PPCRecImlSegment_t* imlSegment : ppcImlGenContext.segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; if (imlSegment->isEnterable == false) continue; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 2e40f19d2..4c07cfaad 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -321,9 +321,10 @@ struct ppcImlGenContext_t sint32 imlListSize; sint32 imlListCount; // list of segments - PPCRecImlSegment_t** segmentList; - sint32 segmentListSize; - sint32 segmentListCount; + //PPCRecImlSegment_t** segmentList; + //sint32 segmentListSize; + //sint32 segmentListCount; + std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode // register allocator info diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b96854882..4fb45f50a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -3450,10 +3450,9 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 f=0; fsegmentListCount; f++) + for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[f]; - PPCRecompiler_dumpIMLSegment(imlSegment, f); + PPCRecompiler_dumpIMLSegment(ppcImlGenContext->segmentList2[i], i); debug_printf("\n"); } } @@ -3548,43 +3547,18 @@ PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlS return imlSegment->imlList + index; } -void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count) +PPCRecImlSegment_t* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) { - if( (ppcImlGenContext->segmentListCount+count) > ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize += count; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - for(sint32 i=(sint32)ppcImlGenContext->segmentListCount-1; i>=index; i--) - { - memcpy(ppcImlGenContext->segmentList+(i+count), ppcImlGenContext->segmentList+i, sizeof(PPCRecImlSegment_t*)); - } - ppcImlGenContext->segmentListCount += count; - for(sint32 i=0; isegmentList+index+i, 0x00, sizeof(PPCRecImlSegment_t*)); - ppcImlGenContext->segmentList[index+i] = (PPCRecImlSegment_t*)malloc(sizeof(PPCRecImlSegment_t)); - memset(ppcImlGenContext->segmentList[index+i], 0x00, sizeof(PPCRecImlSegment_t)); - ppcImlGenContext->segmentList[index + i]->list_prevSegments = std::vector(); - } + PPCRecImlSegment_t* segment = new PPCRecImlSegment_t(); + ppcImlGenContext->segmentList2.emplace_back(segment); + return segment; } -/* - * Allocate and init a new iml instruction segment - */ -PPCRecImlSegment_t* PPCRecompiler_generateImlSegment(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count) { - if( ppcImlGenContext->segmentListCount >= ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize *= 2; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - PPCRecImlSegment_t* ppcRecSegment = new PPCRecImlSegment_t(); - ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount] = ppcRecSegment; - ppcImlGenContext->segmentListCount++; - return ppcRecSegment; + ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr); + for (sint32 i = 0; i < count; i++) + ppcImlGenContext->segmentList2[index + i] = new PPCRecImlSegment_t(); } void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) @@ -3594,17 +3568,25 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) free(ppcImlGenContext->imlList); ppcImlGenContext->imlList = nullptr; } - for(sint32 i=0; isegmentListCount; i++) - { - free(ppcImlGenContext->segmentList[i]->imlList); - delete ppcImlGenContext->segmentList[i]; - } - ppcImlGenContext->segmentListCount = 0; - if (ppcImlGenContext->segmentList) + + for (PPCRecImlSegment_t* imlSegment : ppcImlGenContext->segmentList2) { - free(ppcImlGenContext->segmentList); - ppcImlGenContext->segmentList = nullptr; + free(imlSegment->imlList); + delete imlSegment; } + ppcImlGenContext->segmentList2.clear(); + + //for(sint32 i=0; isegmentListCount; i++) + //{ + // free(ppcImlGenContext->segmentList[i]->imlList); + // delete ppcImlGenContext->segmentList[i]; + //} + //ppcImlGenContext->segmentListCount = 0; + //if (ppcImlGenContext->segmentList) + //{ + // free(ppcImlGenContext->segmentList); + // ppcImlGenContext->segmentList = nullptr; + //} } bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml) @@ -4598,9 +4580,8 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // divide iml instructions into segments // each segment is defined by one or more instructions with no branches or jump destinations in between // a branch instruction may only be the very last instruction of a segment - ppcImlGenContext.segmentListCount = 0; - ppcImlGenContext.segmentListSize = 2; - ppcImlGenContext.segmentList = (PPCRecImlSegment_t**)malloc(ppcImlGenContext.segmentListSize*sizeof(PPCRecImlSegment_t*)); + cemu_assert_debug(ppcImlGenContext.segmentList2.empty()); + sint32 segmentStart = 0; sint32 segmentImlIndex = 0; while( segmentImlIndex < ppcImlGenContext.imlListCount ) @@ -4619,7 +4600,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) { // segment ends after current instruction - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart+1; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4631,7 +4612,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // segment ends before current instruction if( segmentImlIndex > segmentStart ) { - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4643,123 +4624,122 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if( segmentImlIndex != segmentStart ) { // final segment - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; segmentStart = segmentImlIndex; } // move iml instructions into the segments - for(sint32 s=0; sstartOffset; - uint32 imlCount = ppcImlGenContext.segmentList[s]->count; + uint32 imlStartIndex = segIt->startOffset; + uint32 imlCount = segIt->count; if( imlCount > 0 ) { - ppcImlGenContext.segmentList[s]->imlListSize = imlCount + 4; - ppcImlGenContext.segmentList[s]->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)*ppcImlGenContext.segmentList[s]->imlListSize); - ppcImlGenContext.segmentList[s]->imlListCount = imlCount; - memcpy(ppcImlGenContext.segmentList[s]->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); + segIt->imlListSize = imlCount + 4; + segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)* segIt->imlListSize); + segIt->imlListCount = imlCount; + memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); } else { // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - ppcImlGenContext.segmentList[s]->imlList = NULL; - ppcImlGenContext.segmentList[s]->imlListSize = 0; - ppcImlGenContext.segmentList[s]->imlListCount = 0; + segIt->imlList = nullptr; + segIt->imlListSize = 0; + segIt->imlListCount = 0; } - ppcImlGenContext.segmentList[s]->startOffset = 9999999; - ppcImlGenContext.segmentList[s]->count = 9999999; + segIt->startOffset = 9999999; + segIt->count = 9999999; } // clear segment-independent iml list free(ppcImlGenContext.imlList); ppcImlGenContext.imlList = NULL; ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for(sint32 s=0; simlListCount; i++) + for(sint32 i=0; i< segIt->imlListCount; i++) { - if( ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress == 0 ) + if(segIt->imlList[i].associatedPPCAddress == 0 ) continue; //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMax); + segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); + segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); } if( segmentPPCAddrMin != 0xFFFFFFFF ) { - ppcImlGenContext.segmentList[s]->ppcAddrMin = segmentPPCAddrMin; - ppcImlGenContext.segmentList[s]->ppcAddrMax = segmentPPCAddrMax; + segIt->ppcAddrMin = segmentPPCAddrMin; + segIt->ppcAddrMax = segmentPPCAddrMax; } else { - ppcImlGenContext.segmentList[s]->ppcAddrMin = 0; - ppcImlGenContext.segmentList[s]->ppcAddrMax = 0; + segIt->ppcAddrMin = 0; + segIt->ppcAddrMax = 0; } } // certain instructions can change the segment state // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) // jumpmarks mark the segment as a jump destination (within the same function) - for(sint32 s=0; simlListCount > 0 ) + while (segIt->imlListCount > 0) { - if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER ) + if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) { // mark segment as enterable - if( ppcImlGenContext.segmentList[s]->isEnterable ) + if (segIt->isEnterable) assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isEnterable = true; - ppcImlGenContext.segmentList[s]->enterPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_ppcEnter.ppcAddress; + segIt->isEnterable = true; + segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; // remove ppc_enter instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; + segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].associatedPPCAddress = 0; } - else if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) + else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) { // mark segment as jump destination - if( ppcImlGenContext.segmentList[s]->isJumpDestination ) + if(segIt->isJumpDestination ) assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isJumpDestination = true; - ppcImlGenContext.segmentList[s]->jumpDestinationPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_jumpmark.address; + segIt->isJumpDestination = true; + segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; // remove jumpmark instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; + segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].associatedPPCAddress = 0; } else break; } } // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList[0]->isEnterable = true; - ppcImlGenContext.segmentList[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; + ppcImlGenContext.segmentList2[0]->isEnterable = true; + ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; // link segments for further inter-segment optimization PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; - if (imlSegment->nextSegmentBranchNotTaken == NULL || imlSegment->nextSegmentBranchTaken == NULL) + if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) continue; // not a branching segment - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); + PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt); if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) continue; - PPCRecImlSegment_t* conditionalSegment = imlSegment->nextSegmentBranchNotTaken; - PPCRecImlSegment_t* finalSegment = imlSegment->nextSegmentBranchTaken; - if(imlSegment->nextSegmentBranchTaken != imlSegment->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) + PPCRecImlSegment_t* conditionalSegment = segIt->nextSegmentBranchNotTaken; + PPCRecImlSegment_t* finalSegment = segIt->nextSegmentBranchTaken; + if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) continue; - if (imlSegment->nextSegmentBranchNotTaken->imlListCount > 4) + if (segIt->nextSegmentBranchNotTaken->imlListCount > 4) continue; - if(conditionalSegment->list_prevSegments.size() != 1) + if (conditionalSegment->list_prevSegments.size() != 1) continue; // the reduced segment must not be the target of any other branch - if(conditionalSegment->isEnterable) + if (conditionalSegment->isEnterable) continue; // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) bool canReduceSegment = true; @@ -4788,16 +4768,16 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext { PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList + f; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(imlSegment), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); + PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); else assert_dbg(); } // update segment links // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - PPCRecompilerIML_removeLink(imlSegment, conditionalSegment); - PPCRecompilerIML_removeLink(imlSegment, finalSegment); + PPCRecompilerIML_removeLink(segIt, conditionalSegment); + PPCRecompilerIML_removeLink(segIt, finalSegment); PPCRecompilerIML_removeLink(conditionalSegment, finalSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, finalSegment); + PPCRecompilerIml_setLinkBranchNotTaken(segIt, finalSegment); // remove all instructions from conditional segment conditionalSegment->imlListCount = 0; @@ -4805,23 +4785,23 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) { // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - PPCRecompilerIML_removeLink(imlSegment, finalSegment); + PPCRecompilerIML_removeLink(segIt, finalSegment); if (finalSegment->nextSegmentBranchNotTaken) { PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, tempSegment); + PPCRecompilerIml_setLinkBranchNotTaken(segIt, tempSegment); } if (finalSegment->nextSegmentBranchTaken) { PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchTaken(imlSegment, tempSegment); + PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment); } // copy IML instructions for (sint32 f = 0; f < finalSegment->imlListCount; f++) { - memcpy(PPCRecompiler_appendInstruction(imlSegment), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); + memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); } finalSegment->imlListCount = 0; @@ -4832,33 +4812,32 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for(sint32 s=0; sppcAddrMin == 0 ) + if( segIt->ppcAddrMin == 0 ) continue; // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC + // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions uint32 lastPPCInstAddr = 0; uint32 ppcCount2 = 0; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - if (imlSegment->imlList[i].associatedPPCAddress == 0) + if (segIt->imlList[i].associatedPPCAddress == 0) continue; - if (imlSegment->imlList[i].associatedPPCAddress == lastPPCInstAddr) + if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) continue; - lastPPCInstAddr = imlSegment->imlList[i].associatedPPCAddress; + lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; ppcCount2++; } //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions uint32 cycleCount = ppcCount2;// ppcCount / 4; if( cycleCount > 0 ) { - PPCRecompiler_pushBackIMLInstructions(imlSegment, 0, 1); - imlSegment->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegment->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegment->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - imlSegment->imlList[0].op_macro.param = cycleCount; + PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); + segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + segIt->imlList[0].op_macro.param = cycleCount; } } @@ -4866,10 +4845,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for(sint32 s=0; sppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList2[s]; if( imlSegment->imlListCount == 0 ) continue; if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) @@ -4891,12 +4870,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); imlSegment = NULL; - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList[s+0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList[s+1]; - PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList[s+2]; + PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0]; + PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1]; + PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2]; // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentListCount, 1); - PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList[ppcImlGenContext.segmentListCount-1]; + PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; // relink segments PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); @@ -4972,16 +4951,15 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert name store instructions at the end of each segment but before branch instructions - for(sint32 s=0; simlListCount == 0 ) + if(segIt->imlListCount == 0 ) continue; // ignore empty segments // analyze segment for register usage PPCImlOptimizerUsedRegisters_t registersUsed; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList+i, ®istersUsed); //PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1); sint32 accessedTempReg[5]; // intermediate FPRs @@ -4997,7 +4975,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 ) { - imlSegment->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; + segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; } } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 45e276641..d14c6e000 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -1019,13 +1019,12 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // inefficient algorithm for optimizing away excess registers // we simply load, use and store excess registers into other unused registers when we need to // first we remove all name load and store instructions that involve out-of-bounds registers - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) + while( imlIndex < segIt->imlListCount ) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+imlIndex; + PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+imlIndex; if( imlInstructionItr->type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr->type == PPCREC_IML_TYPE_FPR_NAME_R ) { if( imlInstructionItr->op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) @@ -1039,16 +1038,15 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } } // replace registers - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) + while( imlIndex < segIt->imlListCount ) { PPCImlOptimizerUsedRegisters_t registersUsed; while( true ) { - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, ®istersUsed); if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) { // get index of register to replace @@ -1091,16 +1089,16 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte bool replacedRegisterIsUsed = true; if( unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0+32) ) { - replacedRegisterIsUsed = imlSegment->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; + replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; } // replace registers that are out of range - PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, fprToReplace, unusedRegisterIndex); + PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, fprToReplace, unusedRegisterIndex); // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex+1, 2); + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex, 2); + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndex+0); + PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+(imlIndex+0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1113,7 +1111,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } else imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr = imlSegment->imlList+(imlIndex+1); + imlInstructionItr = segIt->imlList+(imlIndex+1); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1122,7 +1120,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // name_gprToReplace = unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+3); + imlInstructionItr = segIt->imlList+(imlIndex+3); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1131,7 +1129,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // unusedRegister = name_unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+4); + imlInstructionItr = segIt->imlList+(imlIndex+4); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1223,7 +1221,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon ppcRecManageRegisters_t rCtx = { 0 }; for (sint32 i = 0; i < 64; i++) rCtx.ppcRegToMapping[i] = -1; - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; sint32 idx = 0; sint32 currentUseIndex = 0; PPCImlOptimizerUsedRegisters_t registersUsed; @@ -1374,7 +1372,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) return false; @@ -1530,9 +1528,9 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, P } else if (imlSegment->nextSegmentIsUncertain) { - if (ppcImlGenContext->segmentListCount >= 5) + if (ppcImlGenContext->segmentList2.size() >= 5) { - return 7; // for more complex functions we assume that CR is not passed on + return 7; // for more complex functions we assume that CR is not passed on (hack) } } return currentOverwriteMask; @@ -1568,35 +1566,33 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PP void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) { if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) { uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); + segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + segIt->crBitsRead |= (crBitFlag); } } else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); + segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + segIt->crBitsRead |= (crBitFlag); } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) { - imlSegment->crBitsRead |= 0xFFFFFFFF; + segIt->crBitsRead |= 0xFFFFFFFF; } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - imlSegment->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) { @@ -1604,7 +1600,7 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext imlInstruction->operation == PPCREC_IML_OP_CR_SET) { uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); + segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); } else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || @@ -1612,38 +1608,37 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) { uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); + segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); crBitFlag = 1 << (imlInstruction->op_cr.crA); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); + segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); crBitFlag = 1 << (imlInstruction->op_cr.crB); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); + segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); } else cemu_assert_unimplemented(); } else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) { - imlSegment->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); + segIt->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); } else if( (imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) { // overwrites CR0 - imlSegment->crBitsWritten |= (0xF<<0); + segIt->crBitsWritten |= (0xF<<0); } } } // flag instructions that write to CR where we can ignore individual CR bits - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) { uint32 crBitFlags = 0xF<<((uint32)imlInstruction->crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment); - uint32 crIgnoreMask = crOverwriteMask & ~imlSegment->crBitsRead; + uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); + uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; imlInstruction->crIgnoreMask = crIgnoreMask; } } @@ -1805,20 +1800,18 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } } } @@ -1891,16 +1884,14 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp */ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) { - PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } } } @@ -1940,12 +1931,11 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 */ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD || imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && @@ -2167,9 +2157,8 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) { // check if this segment has a conditional branch - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - _reorderConditionModifyInstructions(imlSegment); + _reorderConditionModifyInstructions(segIt); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index 88d387e6d..3158303a1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -628,21 +628,20 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - maxLoopDepth = std::max(maxLoopDepth, ppcImlGenContext->segmentList[i]->loopDepth); + maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); } while (true) { bool done = false; for (sint32 d = maxLoopDepth; d >= 0; d--) { - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; - if (imlSegment->loopDepth != d) + if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, imlSegment); + done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt); if (done == false) break; } @@ -932,9 +931,9 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); } } @@ -947,10 +946,10 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code - sint32 segmentIndex = 0; - while (segmentIndex < ppcImlGenContext->segmentListCount) + size_t segmentIndex = 0; + while (segmentIndex < ppcImlGenContext->segmentList2.size()) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; if (imlSegment->nextSegmentIsUncertain) { segmentIndex++; @@ -972,8 +971,8 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen continue; } PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList[segmentIndex + 0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList[segmentIndex + 1]; + PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; + PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); @@ -981,14 +980,14 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen segmentIndex++; } // detect loops - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; imlSegment->momentaryIndex = s; } - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); } } @@ -1009,4 +1008,411 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) PPCRecRA_generateMoveInstructions(ppcImlGenContext); PPCRecRA_deleteAllRanges(ppcImlGenContext); +} + + +bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); +} + +void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + imlSegment->raDistances.reg[i].usageStart = INT_MAX; + imlSegment->raDistances.reg[i].usageEnd = INT_MIN; + } + // scan instructions for usage range + sint32 index = 0; + PPCImlOptimizerUsedRegisters_t gprTracking; + while (index < imlSegment->imlListCount) + { + // end loop at suffix instruction + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + break; + // get accessed GPRs + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + for (sint32 t = 0; t < 4; t++) + { + sint32 virtualRegister = gprTracking.gpr[t]; + if (virtualRegister < 0) + continue; + cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); + imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction + imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction + } + // next instruction + index++; + } +} + +void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // for each register calculate min/max index of usage range within each segment + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + { + PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt); + } +} + +raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) +{ + if (imlSegment->raDistances.isProcessed[vGPR]) + { + // return already existing segment + return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; + } + imlSegment->raDistances.isProcessed[vGPR] = true; + if (_isRangeDefined(imlSegment, vGPR) == false) + return nullptr; + // create subrange + cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); + raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); + // traverse forward + if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + { + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + } + if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + } + } + // traverse backward + if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + for (auto& it : imlSegment->list_prevSegments) + { + if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); + } + } + return subrange; +} + +void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + if (_isRangeDefined(imlSegment, i) == false) + continue; + if (imlSegment->raDistances.isProcessed[i]) + continue; + raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); + PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); + } + // create lookup table of ranges + raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; +#ifdef CEMU_DEBUG_ASSERT + if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) + assert_dbg(); +#endif + } + // parse instructions and convert to locations + sint32 index = 0; + PPCImlOptimizerUsedRegisters_t gprTracking; + while (index < imlSegment->imlListCount) + { + // end loop at suffix instruction + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + break; + // get accessed GPRs + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + // handle accessed GPR + for (sint32 t = 0; t < 4; t++) + { + sint32 virtualRegister = gprTracking.gpr[t]; + if (virtualRegister < 0) + continue; + bool isWrite = (t == 3); + // add location + PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); +#ifdef CEMU_DEBUG_ASSERT + if (index < vGPR2Subrange[virtualRegister]->start.index) + assert_dbg(); + if (index + 1 > vGPR2Subrange[virtualRegister]->end.index) + assert_dbg(); +#endif + } + // next instruction + index++; + } +} + +void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + if (_isRangeDefined(imlSegment, vGPR) == false) + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; + return; + } + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; +} + +void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + if (_isRangeDefined(imlSegment, vGPR) == false) + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; + } + else + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + } + // propagate backwards + for (auto& it : imlSegment->list_prevSegments) + { + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); + } +} + +void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) +{ +#ifdef CEMU_DEBUG_ASSERT + if (routeDepth < 2) + assert_dbg(); +#endif + // extend starting range to end of segment + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); + // extend all the connecting segments in both directions + for (sint32 i = 1; i < (routeDepth - 1); i++) + { + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); + } + // extend the final segment towards the beginning + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR); +} + +void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) +{ + if (routeDepth >= 64) + { + forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress); + return; + } + route[routeDepth] = currentSegment; + if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) + { + // measure distance to end of segment + distanceLeft -= currentSegment->imlListCount; + if (distanceLeft > 0) + { + if (currentSegment->nextSegmentBranchNotTaken) + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); + if (currentSegment->nextSegmentBranchTaken) + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); + } + return; + } + else + { + // measure distance to range + if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) + { + if (distanceLeft < currentSegment->imlListCount) + return; // range too far away + } + else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) + return; // out of range + // found close range -> connect ranges + _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); + } +} + +void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) +{ +#ifdef CEMU_DEBUG_ASSERT + if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) + assert_dbg(); +#endif + // count instructions to end of initial segment + if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) + assert_dbg(); + sint32 instructionsUntilEndOfSeg; + if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + instructionsUntilEndOfSeg = 0; + else + instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; + +#ifdef CEMU_DEBUG_ASSERT + if (instructionsUntilEndOfSeg < 0) + assert_dbg(); +#endif + sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; + if (remainingScanDist <= 0) + return; // can't reach end + + // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch + PPCRecImlSegment_t* route[64]; + route[0] = currentSegment; + if (currentSegment->nextSegmentBranchNotTaken) + { + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); + } + if (currentSegment->nextSegmentBranchTaken) + { + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); + } +} + +void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + { + if (imlSegment->raDistances.reg[i].usageStart == INT_MAX) + continue; // not used + // check and extend if possible + PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); + } +#ifdef CEMU_DEBUG_ASSERT + if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) + assert_dbg(); + if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) + assert_dbg(); +#endif +} + +void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + std::vector list_segments; + list_segments.reserve(1000); + sint32 index = 0; + imlSegment->raRangeExtendProcessed = true; + list_segments.push_back(imlSegment); + while (index < list_segments.size()) + { + PPCRecImlSegment_t* currentSegment = list_segments[index]; + PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); + // follow flow + if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) + { + currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; + list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); + } + if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) + { + currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; + list_segments.push_back(currentSegment->nextSegmentBranchTaken); + } + index++; + } +} + +void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + if (imlSegment->list_prevSegments.empty()) + { + if (imlSegment->raRangeExtendProcessed) + assert_dbg(); // should not happen + PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); + } + } +} + +void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) +{ + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + auto localLoopDepth = imlSegment->loopDepth; + if (localLoopDepth <= 0) + continue; // not inside a loop + // look for loop exit + bool hasLoopExit = false; + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (hasLoopExit == false) + continue; + + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + { + if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) + continue; // range not set or does not reach end of segment + if (imlSegment->nextSegmentBranchTaken) + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); + if (imlSegment->nextSegmentBranchNotTaken) + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); + } + } +} + +void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // merge close ranges + PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); + // extra pass to move register stores out of loops + PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); + // calculate liveness ranges + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); + } +} + +void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) +{ + bool isRead = false; + bool isWritten = false; + bool isOverwritten = false; + for (auto& location : subrange->list_locations) + { + if (location.isRead) + { + isRead = true; + } + if (location.isWrite) + { + if (isRead == false) + isOverwritten = true; + isWritten = true; + } + } + subrange->_noLoad = isOverwritten; + subrange->hasStore = isWritten; + + if (subrange->start.index == RA_INTER_RANGE_START) + subrange->_noLoad = true; +} + +void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore + // first do a per-subrange pass + for (auto& range : ppcImlGenContext->raInfo.list_ranges) + { + for (auto& subrange : range->list_subranges) + { + PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + } + } + // then do a second pass where we scan along subrange flow + for (auto& range : ppcImlGenContext->raInfo.list_ranges) + { + for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm + { + _analyzeRangeDataFlow(subrange); + } + } } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp deleted file mode 100644 index abb47e926..000000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp +++ /dev/null @@ -1,414 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" -#include - -bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); -} - -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - imlSegment->raDistances.reg[i].usageStart = INT_MAX; - imlSegment->raDistances.reg[i].usageEnd = INT_MIN; - } - // scan instructions for usage range - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction - } - // next instruction - index++; - } -} - -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // for each register calculate min/max index of usage range within each segment - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]); - } -} - -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) -{ - if (imlSegment->raDistances.isProcessed[vGPR]) - { - // return already existing segment - return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; - } - imlSegment->raDistances.isProcessed[vGPR] = true; - if (_isRangeDefined(imlSegment, vGPR) == false) - return nullptr; - // create subrange - cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); - // traverse forward - if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - { - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); - } - } - // traverse backward - if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - for (auto& it : imlSegment->list_prevSegments) - { - if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); - } - } - // return subrange - return subrange; -} - -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - if( _isRangeDefined(imlSegment, i) == false ) - continue; - if( imlSegment->raDistances.isProcessed[i]) - continue; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); - PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); - } - // create lookup table of ranges - raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; -#ifdef CEMU_DEBUG_ASSERT - if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) - assert_dbg(); -#endif - } - // parse instructions and convert to locations - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - // handle accessed GPR - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - bool isWrite = (t == 3); - // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); -#ifdef CEMU_DEBUG_ASSERT - if (index < vGPR2Subrange[virtualRegister]->start.index) - assert_dbg(); - if (index+1 > vGPR2Subrange[virtualRegister]->end.index) - assert_dbg(); -#endif - } - // next instruction - index++; - } -} - -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; - return; - } - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; -} - -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; - } - else - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - } - // propagate backwards - for (auto& it : imlSegment->list_prevSegments) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); - } -} - -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) -{ -#ifdef CEMU_DEBUG_ASSERT - if (routeDepth < 2) - assert_dbg(); -#endif - // extend starting range to end of segment - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); - // extend all the connecting segments in both directions - for (sint32 i = 1; i < (routeDepth - 1); i++) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); - } - // extend the final segment towards the beginning - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR); -} - -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) -{ - if (routeDepth >= 64) - { - cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded for function 0x{:08x}", ppcImlGenContext->functionRef->ppcAddress); - return; - } - route[routeDepth] = currentSegment; - if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) - { - // measure distance to end of segment - distanceLeft -= currentSegment->imlListCount; - if (distanceLeft > 0) - { - if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); - if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); - } - return; - } - else - { - // measure distance to range - if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) - { - if (distanceLeft < currentSegment->imlListCount) - return; // range too far away - } - else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) - return; // out of range - // found close range -> connect ranges - _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); - } -} - -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) -{ -#ifdef CEMU_DEBUG_ASSERT - if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) - assert_dbg(); -#endif - // count instructions to end of initial segment - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) - assert_dbg(); - sint32 instructionsUntilEndOfSeg; - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - instructionsUntilEndOfSeg = 0; - else - instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; - -#ifdef CEMU_DEBUG_ASSERT - if (instructionsUntilEndOfSeg < 0) - assert_dbg(); -#endif - sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; - if (remainingScanDist <= 0) - return; // can't reach end - - // also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch - PPCRecImlSegment_t* route[64]; - route[0] = currentSegment; - if (currentSegment->nextSegmentBranchNotTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); - } - if (currentSegment->nextSegmentBranchTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); - } -} - -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if(imlSegment->raDistances.reg[i].usageStart == INT_MAX) - continue; // not used - // check and extend if possible - PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); - } -#ifdef CEMU_DEBUG_ASSERT - if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) - assert_dbg(); - if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) - assert_dbg(); -#endif -} - -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - std::vector list_segments; - list_segments.reserve(1000); - sint32 index = 0; - imlSegment->raRangeExtendProcessed = true; - list_segments.push_back(imlSegment); - while (index < list_segments.size()) - { - PPCRecImlSegment_t* currentSegment = list_segments[index]; - PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); - // follow flow - if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); - } - if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchTaken); - } - index++; - } -} - -void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - if (imlSegment->list_prevSegments.empty()) - { - if (imlSegment->raRangeExtendProcessed) - assert_dbg(); // should not happen - PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); - } - } -} - -void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - auto localLoopDepth = imlSegment->loopDepth; - if( localLoopDepth <= 0 ) - continue; // not inside a loop - // look for loop exit - bool hasLoopExit = false; - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if(hasLoopExit == false) - continue; - - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) - continue; // range not set or does not reach end of segment - if(imlSegment->nextSegmentBranchTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); - if(imlSegment->nextSegmentBranchNotTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); - } - } -} - -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // merge close ranges - PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); - // extra pass to move register stores out of loops - PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); - // calculate liveness ranges - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); - } -} - -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) -{ - bool isRead = false; - bool isWritten = false; - bool isOverwritten = false; - for (auto& location : subrange->list_locations) - { - if (location.isRead) - { - isRead = true; - } - if (location.isWrite) - { - if (isRead == false) - isOverwritten = true; - isWritten = true; - } - } - subrange->_noLoad = isOverwritten; - subrange->hasStore = isWritten; - - if (subrange->start.index == RA_INTER_RANGE_START) - subrange->_noLoad = true; -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange); - -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore - // first do a per-subrange pass - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) - { - PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); - } - } - // then do a second pass where we scan along subrange flow - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm - { - _analyzeRangeDataFlow(subrange); - } - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index fcbe64be9..05fd93e7e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -3,11 +3,11 @@ PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(sint32 s=0; ssegmentListCount; s++) + for(PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset ) + if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) { - return ppcImlGenContext->segmentList[s]; + return segIt; } } debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); @@ -94,17 +94,18 @@ void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPC void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 s=0; ssegmentListCount; s++) + size_t segCount = ppcImlGenContext->segmentList2.size(); + for(size_t s=0; ssegmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount; - PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1]; + bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); + PPCRecImlSegment_t* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; // handle empty segment if( imlSegment->imlListCount == 0 ) { if (isLastSegment == false) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment + PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment else imlSegment->nextSegmentIsUncertain = true; continue; @@ -143,15 +144,15 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) { - sint32 initialSegmentCount = ppcImlGenContext->segmentListCount; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + size_t initialSegmentCount = ppcImlGenContext->segmentList2.size(); + for (size_t i = 0; i < initialSegmentCount; i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[i]; if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) { // spawn new segment at end - PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1); - PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1]; + PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1); + PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1]; entrySegment->isEnterable = true; entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index a30295b57..b2d934c8f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -2299,13 +2299,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // generate iml instruction code bool codeGenerationFailed = false; - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - ppcImlGenContext->segmentList[s]->x64Offset = x64GenContext.codeBufferIndex; - for(sint32 i=0; iimlListCount; i++) + segIt->x64Offset = x64GenContext.codeBufferIndex; + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) { @@ -2352,7 +2351,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlSegment, imlInstruction) == false ) + if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) { codeGenerationFailed = true; } @@ -2503,11 +2502,11 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo uint32 x64Offset = 0xFFFFFFFF; if (x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - if (ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset) + if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) { - x64Offset = ppcImlGenContext->segmentList[s]->x64Offset; + x64Offset = segIt->x64Offset; break; } } From 0265108e5661e562cb99dc9f257b24a70ec60cfc Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 4 Nov 2022 21:33:03 +0100 Subject: [PATCH 03/64] PPCRec: Use vector for instruction list --- .../HW/Espresso/Recompiler/PPCRecompiler.h | 4 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 2 +- .../Recompiler/PPCRecompilerImlAnalyzer.cpp | 16 +- .../Recompiler/PPCRecompilerImlGen.cpp | 360 +++++++++--------- .../Recompiler/PPCRecompilerImlOptimizer.cpp | 321 ++++++++-------- .../PPCRecompilerImlRegisterAllocator.cpp | 61 ++- .../Recompiler/PPCRecompilerIntermediate.cpp | 12 +- .../Espresso/Recompiler/PPCRecompilerX64.cpp | 4 +- 8 files changed, 379 insertions(+), 401 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 4c07cfaad..7798df67e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -269,9 +269,7 @@ typedef struct _PPCRecImlSegment_t uint32 x64Offset{}; // x64 code offset of segment start uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) // list of intermediate instructions in this segment - PPCRecImlInstruction_t* imlList{}; - sint32 imlListSize{}; - sint32 imlListCount{}; + std::vector imlList; // segment link _PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch _PPCRecImlSegment_t* nextSegmentBranchTaken{}; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 86af33b2d..036c448e5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -419,4 +419,4 @@ typedef struct }; }PPCImlOptimizerUsedRegisters_t; -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); +void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp index 4962d30d1..3ffe5aed1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp @@ -13,9 +13,9 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) if (imlSegment->nextSegmentBranchTaken != imlSegment) return false; // loops using BDNZ are assumed to always be finite - for (sint32 t = 0; t < imlSegment->imlListCount; t++) + for(const PPCRecImlInstruction_t& instIt : imlSegment->imlList) { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8) { return true; } @@ -24,11 +24,11 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB) // this catches most loops with load-update and store-update instructions, but also those with decrementing counters FixedSizeList list_modifiedRegisters; - for (sint32 t = 0; t < imlSegment->imlListCount; t++) + for (const PPCRecImlInstruction_t& instIt : imlSegment->imlList) { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) ) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) ) { - list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex); + list_modifiedRegisters.addUnique(instIt.op_r_immS32.registerIndex); } } if (list_modifiedRegisters.count > 0) @@ -36,11 +36,11 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) // remove all registers from the list that are modified by non-ADD/SUB instructions // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out PPCImlOptimizerUsedRegisters_t registersUsed; - for (sint32 t = 0; t < imlSegment->imlListCount; t++) + for (const PPCRecImlInstruction_t& instIt : imlSegment->imlList) { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB)) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) continue; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed); + PPCRecompiler_checkRegisterUsage(nullptr, &instIt, ®istersUsed); if(registersUsed.writtenNamedReg1 < 0) continue; list_modifiedRegisters.remove(registersUsed.writtenNamedReg1); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 4fb45f50a..e31f196cf 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2990,7 +2990,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext char _tempOpcodename[32]; -const char* PPCRecompiler_getOpcodeDebugName(PPCRecImlInstruction_t* iml) +const char* PPCRecompiler_getOpcodeDebugName(const PPCRecImlInstruction_t* iml) { uint32 op = iml->operation; if (op == PPCREC_IML_OP_ASSIGN) @@ -3115,294 +3115,295 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment sint32 lineOffsetParameters = 18; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlList.size(); i++) { + const PPCRecImlInstruction_t& inst = imlSegment->imlList[i]; // don't log NOP instructions unless they have an associated PPC address - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP && imlSegment->imlList[i].associatedPPCAddress == MPTR_NULL) + if(inst.type == PPCREC_IML_TYPE_NO_OP && inst.associatedPPCAddress == MPTR_NULL) continue; strOutput.reset(); - strOutput.addFmt("{:08x} ", imlSegment->imlList[i].associatedPPCAddress); - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME || imlSegment->imlList[i].type == PPCREC_IML_TYPE_NAME_R) + strOutput.addFmt("{:08x} ", inst.associatedPPCAddress); + if( inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME) + if(inst.type == PPCREC_IML_TYPE_R_NAME) strOutput.add("LD_NAME"); else strOutput.add("ST_NAME"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_name.registerIndex); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_name.registerIndex); - strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_R0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_R0+999) ) + strOutput.addFmt("name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + if( inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0+999) ) { - strOutput.addFmt("r{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_R0); + strOutput.addFmt("r{}", inst.op_r_name.name-PPCREC_NAME_R0); } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_SPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_SPR0+999) ) + else if( inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0+999) ) { - strOutput.addFmt("spr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_SPR0); + strOutput.addFmt("spr{}", inst.op_r_name.name-PPCREC_NAME_SPR0); } else strOutput.add("ukn"); strOutput.add(")"); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R ) + else if( inst.type == PPCREC_IML_TYPE_R_R ) { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList+i)); + strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerA, true); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r.registerResult); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r.registerA, true); - if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER ) + if( inst.crRegister != PPC_REC_INVALID_REGISTER ) { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); + strOutput.addFmt(" -> CR{}", inst.crRegister); } } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_R ) + else if( inst.type == PPCREC_IML_TYPE_R_R_R ) { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); + strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerA); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerB, true); - if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER ) + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerResult); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerA); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerB, true); + if( inst.crRegister != PPC_REC_INVALID_REGISTER ) { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); + strOutput.addFmt(" -> CR{}", inst.crRegister); } } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_S32) + else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); + strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerA); - PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_r_s32.immS32, true); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_s32.registerResult); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_s32.registerA); + PPCRecDebug_addS32Param(strOutput, inst.op_r_r_s32.immS32, true); - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) + if (inst.crRegister != PPC_REC_INVALID_REGISTER) { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); + strOutput.addFmt(" -> CR{}", inst.crRegister); } } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_S32) + else if (inst.type == PPCREC_IML_TYPE_R_S32) { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); + strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_immS32.registerIndex); - PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_immS32.immS32, true); + PPCRecDebug_addRegisterParam(strOutput, inst.op_r_immS32.registerIndex); + PPCRecDebug_addS32Param(strOutput, inst.op_r_immS32.immS32, true); - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) + if (inst.crRegister != PPC_REC_INVALID_REGISTER) { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); + strOutput.addFmt(" -> CR{}", inst.crRegister); } } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK ) + else if( inst.type == PPCREC_IML_TYPE_JUMPMARK ) { - strOutput.addFmt("jm_{:08x}:", imlSegment->imlList[i].op_jumpmark.address); + strOutput.addFmt("jm_{:08x}:", inst.op_jumpmark.address); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_PPC_ENTER ) + else if( inst.type == PPCREC_IML_TYPE_PPC_ENTER ) { - strOutput.addFmt("ppcEnter_{:08x}:", imlSegment->imlList[i].op_ppcEnter.ppcAddress); + strOutput.addFmt("ppcEnter_{:08x}:", inst.op_ppcEnter.ppcAddress); } - else if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE || - imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED ) + else if(inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || + inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED ) { - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED) + if(inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) strOutput.add("LD_"); else strOutput.add("ST_"); - if (imlSegment->imlList[i].op_storeLoad.flags2.signExtend) + if (inst.op_storeLoad.flags2.signExtend) strOutput.add("S"); else strOutput.add("U"); - strOutput.addFmt("{}", imlSegment->imlList[i].op_storeLoad.copyWidth); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_storeLoad.registerData); + PPCRecDebug_addRegisterParam(strOutput, inst.op_storeLoad.registerData); - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.registerMem2); + if(inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); else - strOutput.addFmt("[t{}+{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32); + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_MEM2MEM) + else if (inst.type == PPCREC_IML_TYPE_MEM2MEM) { - strOutput.addFmt("{} [t{}+{}] = [t{}+{}]", imlSegment->imlList[i].op_mem2mem.copyWidth, imlSegment->imlList[i].op_mem2mem.dst.registerMem, imlSegment->imlList[i].op_mem2mem.dst.immS32, imlSegment->imlList[i].op_mem2mem.src.registerMem, imlSegment->imlList[i].op_mem2mem.src.immS32); + strOutput.addFmt("{} [t{}+{}] = [t{}+{}]", inst.op_mem2mem.copyWidth, inst.op_mem2mem.dst.registerMem, inst.op_mem2mem.dst.immS32, inst.op_mem2mem.src.registerMem, inst.op_mem2mem.src.immS32); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP ) + else if( inst.type == PPCREC_IML_TYPE_CJUMP ) { - if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) + if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) strOutput.add("JE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) strOutput.add("JNE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) strOutput.add("JG"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) strOutput.add("JGE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) strOutput.add("JL"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) strOutput.add("JLE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) strOutput.add("JALW"); // jump always else cemu_assert_unimplemented(); - strOutput.addFmt(" jm_{:08x} (cr{})", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress, imlSegment->imlList[i].crRegister); + strOutput.addFmt(" jm_{:08x} (cr{})", inst.op_conditionalJump.jumpmarkAddress, inst.crRegister); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) + else if( inst.type == PPCREC_IML_TYPE_NO_OP ) { strOutput.add("NOP"); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_MACRO ) + else if( inst.type == PPCREC_IML_TYPE_MACRO ) { - if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLR ) + if( inst.operation == PPCREC_IML_MACRO_BLR ) { - strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLRL ) + else if( inst.operation == PPCREC_IML_MACRO_BLRL ) { - strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTR ) + else if( inst.operation == PPCREC_IML_MACRO_BCTR ) { - strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTRL ) + else if( inst.operation == PPCREC_IML_MACRO_BCTRL ) { - strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BL ) + else if( inst.operation == PPCREC_IML_MACRO_BL ) { - strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_B_FAR ) + else if( inst.operation == PPCREC_IML_MACRO_B_FAR ) { - strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16); + strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_LEAVE ) + else if( inst.operation == PPCREC_IML_MACRO_LEAVE ) { - strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", imlSegment->imlList[i].op_macro.param); + strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_HLE ) + else if( inst.operation == PPCREC_IML_MACRO_HLE ) { - strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2); + strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_MFTB ) + else if( inst.operation == PPCREC_IML_MACRO_MFTB ) { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2); + strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_COUNT_CYCLES ) + else if( inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES ) { - strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", imlSegment->imlList[i].op_macro.param); + strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); } else { - strOutput.addFmt("MACRO ukn operation {}", imlSegment->imlList[i].operation); + strOutput.addFmt("MACRO ukn operation {}", inst.operation); } } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_NAME ) + else if( inst.type == PPCREC_IML_TYPE_FPR_R_NAME ) { - strOutput.addFmt("fpr_t{} = name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) ) + strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + if( inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0+999) ) { - strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0); + strOutput.addFmt("fpr{}", inst.op_r_name.name-PPCREC_NAME_FPR0); } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) + else if( inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) { - strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); + strOutput.addFmt("tempFpr{}", inst.op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); } else strOutput.add("ukn"); strOutput.add(")"); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_NAME_R ) + else if( inst.type == PPCREC_IML_TYPE_FPR_NAME_R ) { - strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) ) + strOutput.addFmt("name_{} (", inst.op_r_name.name); + if( inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0+999) ) { - strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0); + strOutput.addFmt("fpr{}", inst.op_r_name.name-PPCREC_NAME_FPR0); } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) + else if( inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) { - strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); + strOutput.addFmt("tempFpr{}", inst.op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); } else strOutput.add("ukn"); - strOutput.addFmt(") = fpr_t{}", imlSegment->imlList[i].op_r_name.registerIndex); + strOutput.addFmt(") = fpr_t{}", inst.op_r_name.registerIndex); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_LOAD ) + else if( inst.type == PPCREC_IML_TYPE_FPR_LOAD ) { - strOutput.addFmt("fpr_t{} = ", imlSegment->imlList[i].op_storeLoad.registerData); - if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend ) + strOutput.addFmt("fpr_t{} = ", inst.op_storeLoad.registerData); + if( inst.op_storeLoad.flags2.signExtend ) strOutput.add("S"); else strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}] mode {}", imlSegment->imlList[i].op_storeLoad.copyWidth / 8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32, imlSegment->imlList[i].op_storeLoad.mode); - if (imlSegment->imlList[i].op_storeLoad.flags2.notExpanded) + strOutput.addFmt("{} [t{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + if (inst.op_storeLoad.flags2.notExpanded) { strOutput.addFmt(" "); } } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_STORE ) + else if( inst.type == PPCREC_IML_TYPE_FPR_STORE ) { - if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend ) + if( inst.op_storeLoad.flags2.signExtend ) strOutput.add("S"); else strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", imlSegment->imlList[i].op_storeLoad.copyWidth/8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32); - strOutput.addFmt("= fpr_t{} mode {}\n", imlSegment->imlList[i].op_storeLoad.registerData, imlSegment->imlList[i].op_storeLoad.mode); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth/8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R ) + else if( inst.type == PPCREC_IML_TYPE_FPR_R_R ) { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r.registerOperand); + strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R_R ) + else if( inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R ) { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandB, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandC); + strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R ) + else if( inst.type == PPCREC_IML_TYPE_FPR_R_R_R ) { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandB); + strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress); + strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - strOutput.addFmt("t{} ", imlSegment->imlList[i].op_conditional_r_s32.registerIndex); + strOutput.addFmt("t{} ", inst.op_conditional_r_s32.registerIndex); bool displayAsHex = false; - if (imlSegment->imlList[i].operation == PPCREC_IML_OP_ASSIGN) + if (inst.operation == PPCREC_IML_OP_ASSIGN) { displayAsHex = true; strOutput.add("="); } else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", imlSegment->imlList[i].operation); + strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); if (displayAsHex) - strOutput.addFmt(" 0x{:x}", imlSegment->imlList[i].op_conditional_r_s32.immS32); + strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); else - strOutput.addFmt(" {}", imlSegment->imlList[i].op_conditional_r_s32.immS32); + strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); strOutput.add(" (conditional)"); - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) + if (inst.crRegister != PPC_REC_INVALID_REGISTER) { - strOutput.addFmt(" -> and update CR{}", imlSegment->imlList[i].crRegister); + strOutput.addFmt(" -> and update CR{}", inst.crRegister); } } else { - strOutput.addFmt("Unknown iml type {}", imlSegment->imlList[i].type); + strOutput.addFmt("Unknown iml type {}", inst.type); } debug_printf("%s", strOutput.c_str()); if (printLivenessRangeInfo) @@ -3484,25 +3485,19 @@ void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoi */ void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount) { - cemu_assert(index >= 0 && index <= imlSegment->imlListCount); + cemu_assert_debug(index >= 0 && index <= imlSegment->imlList.size()); + + imlSegment->imlList.insert(imlSegment->imlList.begin() + index, shiftBackCount, {}); + + memset(imlSegment->imlList.data() + index, 0, sizeof(PPCRecImlInstruction_t) * shiftBackCount); - if (imlSegment->imlListCount + shiftBackCount > imlSegment->imlListSize) - { - sint32 newSize = imlSegment->imlListCount + shiftBackCount + std::max(2, imlSegment->imlListSize/2); - imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize); - imlSegment->imlListSize = newSize; - } - for (sint32 i = (sint32)imlSegment->imlListCount - 1; i >= index; i--) - { - memcpy(imlSegment->imlList + (i + shiftBackCount), imlSegment->imlList + i, sizeof(PPCRecImlInstruction_t)); - } // fill empty space with NOP instructions for (sint32 i = 0; i < shiftBackCount; i++) { imlSegment->imlList[index + i].type = PPCREC_IML_TYPE_NONE; } - imlSegment->imlListCount += shiftBackCount; + // update position of segment points if (imlSegment->segmentPointList) { ppcRecompilerSegmentPoint_t* segmentPoint = imlSegment->segmentPointList; @@ -3519,32 +3514,18 @@ void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint3 } } -/* -* Insert and return new instruction at index -* Warning: Can invalidate any previous instruction structs from the same segment -*/ PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index) { PPCRecompiler_pushBackIMLInstructions(imlSegment, index, 1); - return imlSegment->imlList + index; + return imlSegment->imlList.data() + index; } -/* -* Append and return new instruction at the end of the segment -* Warning: Can invalidate any previous instruction structs from the same segment -*/ PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlSegment) { - sint32 index = imlSegment->imlListCount; - if (index >= imlSegment->imlListSize) - { - sint32 newSize = index+1; - imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize); - imlSegment->imlListSize = newSize; - } - imlSegment->imlListCount++; - memset(imlSegment->imlList + index, 0, sizeof(PPCRecImlInstruction_t)); - return imlSegment->imlList + index; + size_t index = imlSegment->imlList.size(); + imlSegment->imlList.emplace_back(); + memset(imlSegment->imlList.data() + index, 0, sizeof(PPCRecImlInstruction_t)); + return imlSegment->imlList.data() + index; } PPCRecImlSegment_t* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) @@ -3571,7 +3552,7 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) for (PPCRecImlSegment_t* imlSegment : ppcImlGenContext->segmentList2) { - free(imlSegment->imlList); + //free(imlSegment->imlList); delete imlSegment; } ppcImlGenContext->segmentList2.clear(); @@ -4637,31 +4618,37 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint32 imlCount = segIt->count; if( imlCount > 0 ) { - segIt->imlListSize = imlCount + 4; - segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)* segIt->imlListSize); - segIt->imlListCount = imlCount; - memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); + //segIt->imlListSize = imlCount + 4; + //segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t) * segIt->imlListSize); + //segIt->imlListCount = imlCount; + //memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); + cemu_assert_debug(segIt->imlList.empty()); + //segIt->imlList.resize(imlCount); + //segIt->imlList.insert(segIt->imlList.begin() + imlStartIndex, ); + segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); + } else { // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - segIt->imlList = nullptr; - segIt->imlListSize = 0; - segIt->imlListCount = 0; + cemu_assert_debug(segIt->imlList.empty()); + //segIt->imlList = nullptr; + //segIt->imlListSize = 0; + //segIt->imlListCount = 0; } segIt->startOffset = 9999999; segIt->count = 9999999; } // clear segment-independent iml list free(ppcImlGenContext.imlList); - ppcImlGenContext.imlList = NULL; + ppcImlGenContext.imlList = nullptr; ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) { uint32 segmentPPCAddrMin = 0xFFFFFFFF; uint32 segmentPPCAddrMax = 0x00000000; - for(sint32 i=0; i< segIt->imlListCount; i++) + for(sint32 i=0; i< segIt->imlList.size(); i++) { if(segIt->imlList[i].associatedPPCAddress == 0 ) continue; @@ -4686,7 +4673,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // jumpmarks mark the segment as a jump destination (within the same function) for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) { - while (segIt->imlListCount > 0) + while (segIt->imlList.size() > 0) { if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) { @@ -4735,7 +4722,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecImlSegment_t* finalSegment = segIt->nextSegmentBranchTaken; if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) continue; - if (segIt->nextSegmentBranchNotTaken->imlListCount > 4) + if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) continue; if (conditionalSegment->list_prevSegments.size() != 1) continue; // the reduced segment must not be the target of any other branch @@ -4743,9 +4730,9 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext continue; // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) bool canReduceSegment = true; - for (sint32 f = 0; f < conditionalSegment->imlListCount; f++) + for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList+f; + PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList.data() + f; if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) continue; // todo: Register to register copy @@ -4764,9 +4751,9 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerImlGen_generateNewInstruction_noOp(&ppcImlGenContext, lastInstruction); // append conditional moves based on branch condition - for (sint32 f = 0; f < conditionalSegment->imlListCount; f++) + for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList + f; + PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList.data() + f; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); else @@ -4779,7 +4766,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIML_removeLink(conditionalSegment, finalSegment); PPCRecompilerIml_setLinkBranchNotTaken(segIt, finalSegment); // remove all instructions from conditional segment - conditionalSegment->imlListCount = 0; + conditionalSegment->imlList.clear(); // if possible, merge imlSegment with finalSegment if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) @@ -4799,11 +4786,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment); } // copy IML instructions - for (sint32 f = 0; f < finalSegment->imlListCount; f++) + cemu_assert_debug(segIt != finalSegment); + for (sint32 f = 0; f < finalSegment->imlList.size(); f++) { - memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); + memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(PPCRecImlInstruction_t)); } - finalSegment->imlListCount = 0; + finalSegment->imlList.clear(); //PPCRecompiler_dumpIML(ppcRecFunc, &ppcImlGenContext); } @@ -4820,7 +4808,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions uint32 lastPPCInstAddr = 0; uint32 ppcCount2 = 0; - for (sint32 i = 0; i < segIt->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlList.size(); i++) { if (segIt->imlList[i].associatedPPCAddress == 0) continue; @@ -4849,11 +4837,11 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext { // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList2[s]; - if( imlSegment->imlListCount == 0 ) + if( imlSegment->imlList.empty() ) continue; - if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) continue; - if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpAccordingToSegment) + if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) continue; // exclude non-infinite tight loops if (PPCRecompilerImlAnalyzer_isTightFiniteLoop(imlSegment)) @@ -4929,7 +4917,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; // jump instruction for PEntry PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList + 0); + PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); // skip the newly created segments s += 2; @@ -4953,13 +4941,13 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // insert name store instructions at the end of each segment but before branch instructions for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) { - if(segIt->imlListCount == 0 ) + if(segIt->imlList.size() == 0 ) continue; // ignore empty segments // analyze segment for register usage PPCImlOptimizerUsedRegisters_t registersUsed; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlList.size(); i++) { - PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList.data() + i, ®istersUsed); //PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1); sint32 accessedTempReg[5]; // intermediate FPRs diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index d14c6e000..6875a5252 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -3,7 +3,7 @@ #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) +void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) { registersUsed->readNamedReg1 = -1; registersUsed->readNamedReg2 = -1; @@ -907,7 +907,7 @@ sint32 PPCRecompiler_getNextRegisterToReplace(PPCImlOptimizerUsedRegisters_t* re bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) { PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndexStart, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &imlSegment->imlList[imlIndexStart], ®istersUsed); // mask all registers used by this instruction uint32 instructionReservedRegisterMask = 0;//(1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; if( registersUsed.readNamedReg1 != -1 ) @@ -957,20 +957,20 @@ bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContex bool PPCRecompiler_hasSuffixInstruction(PPCRecImlSegment_t* imlSegment) { - if( imlSegment->imlListCount == 0 ) + if (imlSegment->imlList.empty()) return false; - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+imlSegment->imlListCount-1; - if( imlInstruction->type == PPCREC_IML_TYPE_MACRO && (imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BCTR) || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BLRL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_HLE || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_MFTB || - imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER || - imlInstruction->type == PPCREC_IML_TYPE_CJUMP || - imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) + const PPCRecImlInstruction_t& imlInstruction = imlSegment->imlList.back(); + if( imlInstruction.type == PPCREC_IML_TYPE_MACRO && (imlInstruction.operation == PPCREC_IML_MACRO_BLR || imlInstruction.operation == PPCREC_IML_MACRO_BCTR) || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BL || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_B_FAR || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BLRL || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BCTRL || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_LEAVE || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_HLE || + imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_MFTB || + imlInstruction.type == PPCREC_IML_TYPE_PPC_ENTER || + imlInstruction.type == PPCREC_IML_TYPE_CJUMP || + imlInstruction.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) return true; return false; } @@ -981,29 +981,29 @@ void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, P sint32 imlIndexEdit = *imlIndex; PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndexEdit+0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr->op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; + PPCRecImlInstruction_t& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0]; + memset(&imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + imlInstructionItr.type = PPCREC_IML_TYPE_NAME_R; + imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; + imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; + imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; + imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; + imlInstructionItr.op_r_name.copyWidth = 32; + imlInstructionItr.op_r_name.flags = 0; imlIndexEdit++; // load new register if required if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained ) { PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndexEdit+0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr->op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; + PPCRecImlInstruction_t& imlInstructionItr = imlSegment->imlList[imlIndexEdit]; + memset(&imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + imlInstructionItr.type = PPCREC_IML_TYPE_R_NAME; + imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; + imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; + imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; + imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; + imlInstructionItr.op_r_name.copyWidth = 32; + imlInstructionItr.op_r_name.flags = 0; imlIndexEdit += 1; } // move last entry to current one @@ -1021,17 +1021,17 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // first we remove all name load and store instructions that involve out-of-bounds registers for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - sint32 imlIndex = 0; - while( imlIndex < segIt->imlListCount ) + size_t imlIndex = 0; + while( imlIndex < segIt->imlList.size() ) { - PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+imlIndex; - if( imlInstructionItr->type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr->type == PPCREC_IML_TYPE_FPR_NAME_R ) + PPCRecImlInstruction_t& imlInstructionItr = segIt->imlList[imlIndex]; + if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) { - if( imlInstructionItr->op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) + if( imlInstructionItr.op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) { // convert to NO-OP instruction - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr->associatedPPCAddress = 0; + imlInstructionItr.type = PPCREC_IML_TYPE_NO_OP; + imlInstructionItr.associatedPPCAddress = 0; } } imlIndex++; @@ -1040,13 +1040,13 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // replace registers for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - sint32 imlIndex = 0; - while( imlIndex < segIt->imlListCount ) + size_t imlIndex = 0; + while( imlIndex < segIt->imlList.size() ) { PPCImlOptimizerUsedRegisters_t registersUsed; while( true ) { - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList.data()+imlIndex, ®istersUsed); if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) { // get index of register to replace @@ -1092,13 +1092,13 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; } // replace registers that are out of range - PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, fprToReplace, unusedRegisterIndex); + PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList.data() + imlIndex, fprToReplace, unusedRegisterIndex); // add load/store name after instruction PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); // add load/store before current instruction PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+(imlIndex+0); + PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1111,7 +1111,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } else imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr = segIt->imlList+(imlIndex+1); + imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1120,7 +1120,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // name_gprToReplace = unusedRegister - imlInstructionItr = segIt->imlList+(imlIndex+3); + imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1129,7 +1129,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // unusedRegister = name_unusedRegister - imlInstructionItr = segIt->imlList+(imlIndex+4); + imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1222,14 +1222,15 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon for (sint32 i = 0; i < 64; i++) rCtx.ppcRegToMapping[i] = -1; PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; - sint32 idx = 0; + size_t idx = 0; sint32 currentUseIndex = 0; PPCImlOptimizerUsedRegisters_t registersUsed; - while (idx < imlSegment->imlListCount) + while (idx < imlSegment->imlList.size()) { - if ( PPCRecompiler_isSuffixInstruction(imlSegment->imlList + idx) ) + PPCRecImlInstruction_t& idxInst = imlSegment->imlList[idx]; + if ( PPCRecompiler_isSuffixInstruction(&idxInst) ) break; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList + idx, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &idxInst, ®istersUsed); sint32 fprMatch[4]; sint32 fprReplace[4]; fprMatch[0] = -1; @@ -1276,7 +1277,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); // create unload instruction PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; + PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; @@ -1294,7 +1295,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon } // create load instruction PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; + PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; @@ -1336,7 +1337,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon } if (numReplacedOperands > 0) { - PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList + idx, fprMatch, fprReplace); + PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList.data() + idx, fprMatch, fprReplace); } // next idx++; @@ -1356,7 +1357,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon { if (rCtx.currentMapping[i].isActive == false) continue; - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; + PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; @@ -1387,12 +1388,12 @@ bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; iimlListCount; i++) + for(size_t i=startIndex; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; //nameStoreInstruction->op_r_name.registerIndex PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) return false; if( registersUsed.writtenNamedReg1 == registerIndex ) @@ -1408,11 +1409,11 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; iimlListCount; i++) + for(size_t i=startIndex; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex) return false; if( registersUsed.writtenFPR1 == registerIndex ) @@ -1430,9 +1431,9 @@ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcIml sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(sint32 i=startIndex; i>=0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.writtenNamedReg1 == registerIndex ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) @@ -1450,21 +1451,20 @@ sint32 debugCallCounter1 = 0; */ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) { - //sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; uint32 name = nameStoreInstruction->op_r_name.name; - for(sint32 i=startIndex; iimlListCount; i++) + for(size_t i=startIndex; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) + const PPCRecImlInstruction_t& imlInstruction = imlSegment->imlList[i]; + if(imlInstruction.type == PPCREC_IML_TYPE_R_NAME ) { // name is loaded before being written - if( imlSegment->imlList[i].op_r_name.name == name ) + if (imlInstruction.op_r_name.name == name) return false; } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_NAME_R ) + else if(imlInstruction.type == PPCREC_IML_TYPE_NAME_R ) { // name is written before being loaded - if( imlSegment->imlList[i].op_r_name.name == name ) + if (imlInstruction.op_r_name.name == name) return true; } } @@ -1476,7 +1476,7 @@ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcI return false; if( imlSegment->nextSegmentBranchNotTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, nameStoreInstruction, scanDepth+1) == false ) return false; - if( imlSegment->nextSegmentBranchTaken == NULL && imlSegment->nextSegmentBranchNotTaken == NULL ) + if( imlSegment->nextSegmentBranchTaken == nullptr && imlSegment->nextSegmentBranchNotTaken == nullptr) return false; return true; @@ -1490,12 +1490,12 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(sint32 i=startIndex; i>=0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.writtenFPR1 == registerIndex ) { - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_NAME ) + if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) return true; return false; } @@ -1568,60 +1568,59 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext { for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - for(sint32 i=0; iimlListCount; i++) + for(PPCRecImlInstruction_t& instIt : segIt->imlList) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) + if (instIt.type == PPCREC_IML_TYPE_CJUMP) { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); + uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written segIt->crBitsRead |= (crBitFlag); } } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); + uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written segIt->crBitsRead |= (crBitFlag); } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) + else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) { segIt->crBitsRead |= 0xFFFFFFFF; } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) + else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) { - segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) + else if( instIt.type == PPCREC_IML_TYPE_CR ) { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) + if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || + instIt.operation == PPCREC_IML_OP_CR_SET) { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + uint32 crBitFlag = 1 << (instIt.op_cr.crD); segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) + else if (instIt.operation == PPCREC_IML_OP_CR_OR || + instIt.operation == PPCREC_IML_OP_CR_ORC || + instIt.operation == PPCREC_IML_OP_CR_AND || + instIt.operation == PPCREC_IML_OP_CR_ANDC) { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + uint32 crBitFlag = 1 << (instIt.op_cr.crD); segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - crBitFlag = 1 << (imlInstruction->op_cr.crA); + crBitFlag = 1 << (instIt.op_cr.crA); segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - crBitFlag = 1 << (imlInstruction->op_cr.crB); + crBitFlag = 1 << (instIt.op_cr.crB); segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); } else cemu_assert_unimplemented(); } - else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) + else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7 ) { - segIt->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); + segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); } - else if( (imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) + else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) { // overwrites CR0 segIt->crBitsWritten |= (0xF<<0); @@ -1631,15 +1630,14 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext // flag instructions that write to CR where we can ignore individual CR bits for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - for(sint32 i=0; iimlListCount; i++) + for (PPCRecImlInstruction_t& instIt : segIt->imlList) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; - if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) + if( PPCRecompilerImlAnalyzer_canTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7 ) { - uint32 crBitFlags = 0xF<<((uint32)imlInstruction->crRegister*4); + uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; - imlInstruction->crIgnoreMask = crIgnoreMask; + instIt.crIgnoreMask = crIgnoreMask; } } } @@ -1650,7 +1648,7 @@ bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenCont PPCImlOptimizerUsedRegisters_t registersUsed; for (sint32 i = startIndex; i <= endIndex; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if (registersUsed.writtenNamedReg1 == vreg) return true; @@ -1688,7 +1686,7 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml if (currentSegment->list_prevSegments.size() != 1) return -1; currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlListCount - 1; + currentIndex = currentSegment->imlList.size() - 1; segmentIterateCount++; } // scan again to make sure the register is not modified inbetween @@ -1701,13 +1699,12 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml while (currentIndex >= 0) { // check if register is modified - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, currentSegment->imlList+currentIndex, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, currentSegment->imlList.data() + currentIndex, ®istersUsed); if (registersUsed.writtenNamedReg1 == foundRegister) return -1; // check if end of scan reached if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) { - //foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; return foundRegister; } // previous instruction @@ -1719,7 +1716,7 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml if (currentSegment->list_prevSegments.size() != 1) return -1; currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlListCount - 1; + currentIndex = currentSegment->imlList.size() - 1; segmentIterateCount++; } return -1; @@ -1727,17 +1724,17 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) { - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList + imlIndexLoad; + PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) return; PPCImlOptimizerUsedRegisters_t registersUsed; - sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlListCount); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) + sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) bool foundMatch = false; sint32 lastStore = -1; for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; if (PPCRecompiler_isSuffixInstruction(imlInstruction)) { break; @@ -1802,9 +1799,9 @@ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContex { for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - for (sint32 i = 0; i < segIt->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); @@ -1819,16 +1816,16 @@ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContex void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 gprIndex) { - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList + imlIndexLoad; + PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) return; bool foundMatch = false; PPCImlOptimizerUsedRegisters_t registersUsed; - sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlListCount); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) + sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) sint32 i = imlIndexLoad + 1; for (; i < scanRangeEnd; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; if (PPCRecompiler_isSuffixInstruction(imlInstruction)) { break; @@ -1886,9 +1883,9 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont { for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - for (sint32 i = 0; i < segIt->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) { PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); @@ -1933,21 +1930,19 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) { for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - for (sint32 i = 0; i < segIt->imlListCount; i++) + for(PPCRecImlInstruction_t& instIt : segIt->imlList) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD || imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && - imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) + if(instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && + instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) continue; // get GQR value - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, imlInstruction->op_storeLoad.registerGQR); + cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); cemu_assert(gqrIndex >= 0); if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) continue; - //uint32 gqrValue = ppcInterpreterCurrentInstance->sprNew.UGQR[gqrIndex]; uint32 gqrValue; if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) continue; @@ -1956,41 +1951,41 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) uint32 scale = (gqrValue >> 24) & 0x3F; if (scale != 0) continue; // only generic handler supports scale - if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) + if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) { if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0; else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0; else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0; else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; } - else if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) + else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) { if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1; else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1; else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1; else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; } } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE || imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 && - imlInstruction->op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) + if(instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 && + instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) continue; // get GQR value - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, imlInstruction->op_storeLoad.registerGQR); + cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); cemu_assert(gqrIndex >= 0); if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) continue; @@ -2001,31 +1996,31 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) uint32 scale = (gqrValue >> 24) & 0x3F; if (scale != 0) continue; // only generic handler supports scale - if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) + if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) { if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0; else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0; else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0; else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; } - else if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) + else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) { if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1; else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1; else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; + instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; } } } @@ -2065,9 +2060,9 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) // scan backwards until we find the instruction that sets the CR sint32 crSetterInstructionIndex = -1; sint32 unsafeInstructionIndex = -1; - for (sint32 i = imlSegment->imlListCount-2; i >= 0; i--) + for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; PPCRecompilerImlAnalyzer_getCRTracking(imlInstruction, &crTracking); if (crTracking.readCRBits != 0) return; // dont handle complex cases for now @@ -2101,7 +2096,7 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) // check if we can move the CR setter instruction to after unsafeInstructionIndex PPCRecCRTracking_t crTrackingSetter = crTracking; PPCImlOptimizerUsedRegisters_t regTrackingCRSetter; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList+crSetterInstructionIndex, ®TrackingCRSetter); + PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex, ®TrackingCRSetter); if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) return; // we don't handle FPR dependency yet so just ignore FPR instructions PPCImlOptimizerUsedRegisters_t registerTracking; @@ -2110,7 +2105,7 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) // CR setter does write GPR for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) { - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + i, ®isterTracking); + PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + i, ®isterTracking); // reads register written by CR setter? if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) { @@ -2131,7 +2126,7 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) // CR setter does not write GPR for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) { - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + i, ®isterTracking); + PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + i, ®isterTracking); // writes register read by CR setter? if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) { @@ -2146,8 +2141,8 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) assert_dbg(); #endif PPCRecImlInstruction_t* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); - memcpy(newCRSetterInstruction, imlSegment->imlList + crSetterInstructionIndex, sizeof(PPCRecImlInstruction_t)); - PPCRecompilerImlGen_generateNewInstruction_noOp(NULL, imlSegment->imlList + crSetterInstructionIndex); + memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(PPCRecImlInstruction_t)); + PPCRecompilerImlGen_generateNewInstruction_noOp(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex); } /* diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index 3158303a1..8d41ad377 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -123,7 +123,7 @@ typedef struct void PPCRecRA_insertGPRLoadInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); + PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -136,10 +136,10 @@ void PPCRecRA_insertGPRLoadInstruction(PPCRecImlSegment_t* imlSegment, sint32 in void PPCRecRA_insertGPRLoadInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadCount); - memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*loadCount); + memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*loadCount); for (sint32 i = 0; i < loadCount; i++) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); + PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = (uint8)loadList[i].registerIndex; @@ -152,7 +152,7 @@ void PPCRecRA_insertGPRLoadInstructions(PPCRecImlSegment_t* imlSegment, sint32 i void PPCRecRA_insertGPRStoreInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); + PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -165,10 +165,10 @@ void PPCRecRA_insertGPRStoreInstruction(PPCRecImlSegment_t* imlSegment, sint32 i void PPCRecRA_insertGPRStoreInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeCount); - memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*storeCount); + memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*storeCount); for (sint32 i = 0; i < storeCount; i++) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); + PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -767,7 +767,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; sint32 index = 0; - sint32 suffixInstructionCount = (imlSegment->imlListCount > 0 && PPCRecompiler_isSuffixInstruction(imlSegment->imlList + imlSegment->imlListCount - 1)) ? 1 : 0; + sint32 suffixInstructionCount = (imlSegment->imlList.size() > 0 && PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + imlSegment->imlList.size() - 1)) ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; //for (auto& subrange : imlSegment->raInfo.list_subranges) @@ -793,7 +793,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // process instructions - while(index < imlSegment->imlListCount+1) + while(index < imlSegment->imlList.size() + 1) { // expire ranges for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) @@ -808,7 +808,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // store GPR if (liverange->hasStore) { - PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); + PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); index++; } // remove entry @@ -828,7 +828,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // load GPR if (subrangeItr->_noLoad == false) { - PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); + PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); index++; subrangeItr->start.index--; } @@ -839,10 +839,10 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // replace registers - if (index < imlSegment->imlListCount) + if (index < imlSegment->imlList.size()) { PPCImlOptimizerUsedRegisters_t gprTracking; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + index, &gprTracking); sint32 inputGpr[4]; inputGpr[0] = gprTracking.gpr[0]; @@ -863,7 +863,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; cemu_assert_debug(replaceGpr[f] >= 0); } - PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList + index, inputGpr, replaceGpr); + PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList.data() + index, inputGpr, replaceGpr); } // next iml instruction index++; @@ -898,7 +898,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } if (storeLoadListLength > 0) { - PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); + PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); } // load subranges for next segments subrangeItr = imlSegment->raInfo.linkedList_allSubranges; @@ -925,7 +925,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } if (storeLoadListLength > 0) { - PPCRecRA_insertGPRLoadInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); + PPCRecRA_insertGPRLoadInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); } } @@ -998,7 +998,6 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) ppcImlGenContext->raInfo.list_ranges = std::vector(); - // calculate liveness PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); @@ -1024,23 +1023,23 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, imlSegment->raDistances.reg[i].usageEnd = INT_MIN; } // scan instructions for usage range - sint32 index = 0; + size_t index = 0; PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) + while (index < imlSegment->imlList.size()) { // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + index)) break; // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); for (sint32 t = 0; t < 4; t++) { sint32 virtualRegister = gprTracking.gpr[t]; if (virtualRegister < 0) continue; cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction + imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction + imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction } // next instruction index++; @@ -1117,15 +1116,15 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, #endif } // parse instructions and convert to locations - sint32 index = 0; + size_t index = 0; PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) + while (index < imlSegment->imlList.size()) { // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + index)) break; // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); // handle accessed GPR for (sint32 t = 0; t < 4; t++) { @@ -1136,9 +1135,9 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, // add location PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); #ifdef CEMU_DEBUG_ASSERT - if (index < vGPR2Subrange[virtualRegister]->start.index) + if ((sint32)index < vGPR2Subrange[virtualRegister]->start.index) assert_dbg(); - if (index + 1 > vGPR2Subrange[virtualRegister]->end.index) + if ((sint32)index + 1 > vGPR2Subrange[virtualRegister]->end.index) assert_dbg(); #endif } @@ -1205,7 +1204,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCR if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) { // measure distance to end of segment - distanceLeft -= currentSegment->imlListCount; + distanceLeft -= (sint32)currentSegment->imlList.size(); if (distanceLeft > 0) { if (currentSegment->nextSegmentBranchNotTaken) @@ -1220,7 +1219,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCR // measure distance to range if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) { - if (distanceLeft < currentSegment->imlListCount) + if (distanceLeft < (sint32)currentSegment->imlList.size()) return; // range too far away } else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) @@ -1243,7 +1242,7 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRe if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) instructionsUntilEndOfSeg = 0; else - instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; + instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - currentSegment->raDistances.reg[vGPR].usageEnd; #ifdef CEMU_DEBUG_ASSERT if (instructionsUntilEndOfSeg < 0) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 05fd93e7e..c8b5a827a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -102,7 +102,7 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); PPCRecImlSegment_t* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; // handle empty segment - if( imlSegment->imlListCount == 0 ) + if( imlSegment->imlList.empty()) { if (isLastSegment == false) PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment @@ -111,7 +111,7 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) continue; } // check last instruction of segment - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1); + PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { // find destination segment by ppc jump address @@ -135,9 +135,7 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) else { // all other instruction types do not branch - //imlSegment->nextSegment[0] = nextSegment; PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment); - //imlSegment->nextSegmentIsUncertain = true; } } } @@ -157,7 +155,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0); + PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0); PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false; @@ -168,7 +166,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment) { - if (imlSegment->imlListCount == 0) + if (imlSegment->imlList.empty()) return nullptr; - return imlSegment->imlList + (imlSegment->imlListCount - 1); + return imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index b2d934c8f..85e19371b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -2302,9 +2302,9 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { segIt->x64Offset = x64GenContext.codeBufferIndex; - for(sint32 i=0; iimlListCount; i++) + for(size_t i=0; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) { From b1b46f3d1f4735a08d7322a2ee141e2cfd2fe706 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 4 Nov 2022 22:44:15 +0100 Subject: [PATCH 04/64] PPCRec: Move Segment and Instruction struct into separate files --- src/Cafe/CMakeLists.txt | 7 +- .../Recompiler/IML/IMLInstruction.cpp | 0 .../Espresso/Recompiler/IML/IMLInstruction.h | 387 ++++++++++++++++++ .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 0 .../HW/Espresso/Recompiler/IML/IMLSegment.h | 40 ++ .../HW/Espresso/Recompiler/PPCRecompiler.h | 199 +-------- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 234 ----------- .../Recompiler/PPCRecompilerImlGen.cpp | 19 - .../Recompiler/PPCRecompilerImlOptimizer.cpp | 13 - .../Espresso/Recompiler/PPCRecompilerX64.cpp | 25 -- 10 files changed, 435 insertions(+), 489 deletions(-) create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 58b4ba8bc..6cb13acb2 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -67,14 +67,17 @@ add_library(CemuCafe HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h HW/Espresso/Recompiler/PPCRecompiler.cpp HW/Espresso/Recompiler/PPCRecompiler.h + HW/Espresso/Recompiler/IML/IMLSegment.cpp + HW/Espresso/Recompiler/IML/IMLSegment.h + HW/Espresso/Recompiler/IML/IMLInstruction.cpp + HW/Espresso/Recompiler/IML/IMLInstruction.h HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp HW/Espresso/Recompiler/PPCRecompilerIml.h HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp - HW/Espresso/Recompiler/PPCRecompilerImlRanges.h - + HW/Espresso/Recompiler/PPCRecompilerImlRanges.h HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h new file mode 100644 index 000000000..74427731e --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -0,0 +1,387 @@ +#pragma once + +#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0) +#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1) +#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision +#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used + + +enum +{ + PPCREC_IML_OP_ASSIGN, // '=' operator + PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap + PPCREC_IML_OP_ADD, // '+' operator + PPCREC_IML_OP_SUB, // '-' operator + PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit + PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) + PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) + PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) + PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result + PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result + PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide) + PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide) + PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit + PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit + PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag + PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag + // assign operators with cast + PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend + PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend + // binary operation + PPCREC_IML_OP_OR, // '|' operator + PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first + PPCREC_IML_OP_AND, // '&' operator + PPCREC_IML_OP_XOR, // '^' operator + PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator + PPCREC_IML_OP_LEFT_SHIFT, // shift left operator + PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) + PPCREC_IML_OP_NOT, // complement each bit + PPCREC_IML_OP_NEG, // negate + // ppc + PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) + PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) + PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) + PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) + PPCREC_IML_OP_CNTLZW, + PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry) + PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 + PPCREC_IML_OP_MFCR, // copy cr to gpr + PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) + // condition register + PPCREC_IML_OP_CR_CLEAR, // clear cr bit + PPCREC_IML_OP_CR_SET, // set cr bit + PPCREC_IML_OP_CR_OR, // OR cr bits + PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first + PPCREC_IML_OP_CR_AND, // AND cr bits + PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first + // FPU + PPCREC_IML_OP_FPR_ADD_BOTTOM, + PPCREC_IML_OP_FPR_ADD_PAIR, + PPCREC_IML_OP_FPR_SUB_PAIR, + PPCREC_IML_OP_FPR_SUB_BOTTOM, + PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, + PPCREC_IML_OP_FPR_MULTIPLY_PAIR, + PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, + PPCREC_IML_OP_FPR_DIVIDE_PAIR, + PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, + PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, + PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, + PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched + PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched + PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched + PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, + PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half + PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half + PPCREC_IML_OP_FPR_FCMPO_BOTTOM, + PPCREC_IML_OP_FPR_FCMPU_BOTTOM, + PPCREC_IML_OP_FPR_FCMPU_TOP, + PPCREC_IML_OP_FPR_NEGATE_BOTTOM, + PPCREC_IML_OP_FPR_NEGATE_PAIR, + PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) + PPCREC_IML_OP_FPR_ABS_PAIR, + PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy) + PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy) + PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0) + PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) + PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision + PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, + PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, + PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A + PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A + // PS + PPCREC_IML_OP_FPR_SUM0, + PPCREC_IML_OP_FPR_SUM1, +}; + +#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) + +enum +{ + PPCREC_IML_MACRO_BLR, // macro for BLR instruction code + PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code + PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code + PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code + PPCREC_IML_MACRO_BL, // call to different function (can be within same function) + PPCREC_IML_MACRO_B_FAR, // branch to different function + PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount + PPCREC_IML_MACRO_HLE, // HLE function call + PPCREC_IML_MACRO_MFTB, // get TB register value (low or high) + PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter + // debugging + PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak +}; + +enum +{ + PPCREC_JUMP_CONDITION_NONE, + PPCREC_JUMP_CONDITION_E, // equal / zero + PPCREC_JUMP_CONDITION_NE, // not equal / not zero + PPCREC_JUMP_CONDITION_LE, // less or equal + PPCREC_JUMP_CONDITION_L, // less + PPCREC_JUMP_CONDITION_GE, // greater or equal + PPCREC_JUMP_CONDITION_G, // greater + // special case: + PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling + PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow + +}; + +enum +{ + PPCREC_CR_MODE_COMPARE_SIGNED, + PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare + // others: PPCREC_CR_MODE_ARITHMETIC, + PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code) + PPCREC_CR_MODE_LOGICAL, +}; + +enum +{ + PPCREC_IML_TYPE_NONE, + PPCREC_IML_TYPE_NO_OP, // no-op instruction + PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction) + PPCREC_IML_TYPE_R_R, // r* (op) *r + PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* + PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* + PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] + PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] + PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* + PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r* + PPCREC_IML_TYPE_R_NAME, // r* = name + PPCREC_IML_TYPE_NAME_R, // name* = r* + PPCREC_IML_TYPE_R_S32, // r* (op) imm + PPCREC_IML_TYPE_MACRO, + PPCREC_IML_TYPE_CJUMP, // conditional jump + PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 + PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable + PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) + // conditional + PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // FPR + PPCREC_IML_TYPE_FPR_R_NAME, // name = f* + PPCREC_IML_TYPE_FPR_NAME_R, // f* = name + PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) + PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) + PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) + PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode) + PPCREC_IML_TYPE_FPR_R_R, + PPCREC_IML_TYPE_FPR_R_R_R, + PPCREC_IML_TYPE_FPR_R_R_R_R, + PPCREC_IML_TYPE_FPR_R, +}; + +enum +{ + PPCREC_NAME_NONE, + PPCREC_NAME_TEMPORARY, + PPCREC_NAME_R0 = 1000, + PPCREC_NAME_SPR0 = 2000, + PPCREC_NAME_FPR0 = 3000, + PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 + //PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away) +}; + +// special cases for LOAD/STORE +#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) +#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) +#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1 +#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2 +#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3 +#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1 +#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2 +#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3 + +#define PPC_REC_INVALID_REGISTER 0xFF + +#define PPCREC_CR_BIT_LT 0 +#define PPCREC_CR_BIT_GT 1 +#define PPCREC_CR_BIT_EQ 2 +#define PPCREC_CR_BIT_SO 3 + +enum +{ + // fpr load + PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, + PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, + PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, + PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, + PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1, + PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, + PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1, + PPCREC_FPR_LD_MODE_PSQ_S16_PS0, + PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1, + PPCREC_FPR_LD_MODE_PSQ_U16_PS0, + PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1, + PPCREC_FPR_LD_MODE_PSQ_S8_PS0, + PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1, + PPCREC_FPR_LD_MODE_PSQ_U8_PS0, + PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1, + // fpr store + PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0 + PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0 + + PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 + + PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1, + PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, + PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1, + PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, + PPCREC_FPR_ST_MODE_PSQ_S8_PS0, + PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1, + PPCREC_FPR_ST_MODE_PSQ_U8_PS0, + PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1, + PPCREC_FPR_ST_MODE_PSQ_U16_PS0, + PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1, + PPCREC_FPR_ST_MODE_PSQ_S16_PS0, + PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, +}; + +struct PPCRecImlInstruction_t +{ + uint8 type; + uint8 operation; + uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. + uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior + uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated + uint32 associatedPPCAddress; // ppc address that is associated with this instruction + union + { + struct + { + uint8 _padding[7]; + }padding; + struct + { + // R (op) A [update cr* in mode *] + uint8 registerResult; + uint8 registerA; + }op_r_r; + struct + { + // R = A (op) B [update cr* in mode *] + uint8 registerResult; + uint8 registerA; + uint8 registerB; + }op_r_r_r; + struct + { + // R = A (op) immS32 [update cr* in mode *] + uint8 registerResult; + uint8 registerA; + sint32 immS32; + }op_r_r_s32; + struct + { + // R/F = NAME or NAME = R/F + uint8 registerIndex; + uint8 copyWidth; + uint32 name; + uint8 flags; + }op_r_name; + struct + { + // R (op) s32 [update cr* in mode *] + uint8 registerIndex; + sint32 immS32; + }op_r_immS32; + struct + { + uint32 address; + uint8 flags; + }op_jumpmark; + struct + { + uint32 param; + uint32 param2; + uint16 paramU16; + }op_macro; + struct + { + uint32 jumpmarkAddress; + bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress + uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) + uint8 crRegisterIndex; + uint8 crBitIndex; + bool bitMustBeSet; + }op_conditionalJump; + struct + { + uint8 registerData; + uint8 registerMem; + uint8 registerMem2; + uint8 registerGQR; + uint8 copyWidth; + //uint8 flags; + struct + { + bool swapEndian : 1; + bool signExtend : 1; + bool notExpanded : 1; // for floats + }flags2; + uint8 mode; // transfer mode (copy width, ps0/ps1 behavior) + sint32 immS32; + }op_storeLoad; + struct + { + struct + { + uint8 registerMem; + sint32 immS32; + }src; + struct + { + uint8 registerMem; + sint32 immS32; + }dst; + uint8 copyWidth; + }op_mem2mem; + struct + { + uint8 registerResult; + uint8 registerOperand; + uint8 flags; + }op_fpr_r_r; + struct + { + uint8 registerResult; + uint8 registerOperandA; + uint8 registerOperandB; + uint8 flags; + }op_fpr_r_r_r; + struct + { + uint8 registerResult; + uint8 registerOperandA; + uint8 registerOperandB; + uint8 registerOperandC; + uint8 flags; + }op_fpr_r_r_r_r; + struct + { + uint8 registerResult; + //uint8 flags; + }op_fpr_r; + struct + { + uint32 ppcAddress; + uint32 x64Offset; + }op_ppcEnter; + struct + { + uint8 crD; // crBitIndex (result) + uint8 crA; // crBitIndex + uint8 crB; // crBitIndex + }op_cr; + // conditional operations (emitted if supported by target platform) + struct + { + // r_s32 + uint8 registerIndex; + sint32 immS32; + // condition + uint8 crRegisterIndex; + uint8 crBitIndex; + bool bitMustBeSet; + }op_conditional_r_s32; + }; +}; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h new file mode 100644 index 000000000..a9a734270 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -0,0 +1,40 @@ +#pragma once + +struct PPCRecImlSegment_t +{ + sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) + sint32 startOffset{}; // offset to first instruction in iml instruction list + sint32 count{}; // number of instructions in segment + uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) + uint32 x64Offset{}; // x64 code offset of segment start + uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) + // list of intermediate instructions in this segment + std::vector imlList; + // segment link + PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch + PPCRecImlSegment_t* nextSegmentBranchTaken{}; + bool nextSegmentIsUncertain{}; + sint32 loopDepth{}; + std::vector list_prevSegments{}; + // PPC range of segment + uint32 ppcAddrMin{}; + uint32 ppcAddrMax{}; + // enterable segments + bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) + uint32 enterPPCAddress{}; // used if isEnterable is true + // jump destination segments + bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps + uint32 jumpDestinationPPCAddress{}; + // PPC FPR use mask + bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR + // CR use mask + uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) + uint32 crBitsRead{}; // all bits that are read in this segment + uint32 crBitsWritten{}; // bits that are written in this segment + // register allocator info + PPCSegmentRegisterAllocatorInfo_t raInfo{}; + PPCRecVGPRDistances_t raDistances{}; + bool raRangeExtendProcessed{}; + // segment points + ppcRecompilerSegmentPoint_t* segmentPointList{}; +}; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 7798df67e..d5d8bead8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -26,167 +26,12 @@ typedef struct std::vector list_ranges; }PPCRecFunction_t; -#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0) -#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1) -#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision -#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used - -typedef struct -{ - uint8 type; - uint8 operation; - uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. - uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior - uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated - uint32 associatedPPCAddress; // ppc address that is associated with this instruction - union - { - struct - { - uint8 _padding[7]; - }padding; - struct - { - // R (op) A [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - }op_r_r; - struct - { - // R = A (op) B [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - uint8 registerB; - }op_r_r_r; - struct - { - // R = A (op) immS32 [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - sint32 immS32; - }op_r_r_s32; - struct - { - // R/F = NAME or NAME = R/F - uint8 registerIndex; - uint8 copyWidth; - uint32 name; - uint8 flags; - }op_r_name; - struct - { - // R (op) s32 [update cr* in mode *] - uint8 registerIndex; - sint32 immS32; - }op_r_immS32; - struct - { - uint32 address; - uint8 flags; - }op_jumpmark; - struct - { - uint32 param; - uint32 param2; - uint16 paramU16; - }op_macro; - struct - { - uint32 jumpmarkAddress; - bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress - uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditionalJump; - struct - { - uint8 registerData; - uint8 registerMem; - uint8 registerMem2; - uint8 registerGQR; - uint8 copyWidth; - //uint8 flags; - struct - { - bool swapEndian : 1; - bool signExtend : 1; - bool notExpanded : 1; // for floats - }flags2; - uint8 mode; // transfer mode (copy width, ps0/ps1 behavior) - sint32 immS32; - }op_storeLoad; - struct - { - struct - { - uint8 registerMem; - sint32 immS32; - }src; - struct - { - uint8 registerMem; - sint32 immS32; - }dst; - uint8 copyWidth; - }op_mem2mem; - struct - { - uint8 registerResult; - uint8 registerOperand; - uint8 flags; - }op_fpr_r_r; - struct - { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 flags; - }op_fpr_r_r_r; - struct - { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 registerOperandC; - uint8 flags; - }op_fpr_r_r_r_r; - struct - { - uint8 registerResult; - //uint8 flags; - }op_fpr_r; - struct - { - uint32 ppcAddress; - uint32 x64Offset; - }op_ppcEnter; - struct - { - uint8 crD; // crBitIndex (result) - uint8 crA; // crBitIndex - uint8 crB; // crBitIndex - }op_cr; - // conditional operations (emitted if supported by target platform) - struct - { - // r_s32 - uint8 registerIndex; - sint32 immS32; - // condition - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditional_r_s32; - }; -}PPCRecImlInstruction_t; - -typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t; +#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" typedef struct _ppcRecompilerSegmentPoint_t { sint32 index; - PPCRecImlSegment_t* imlSegment; + struct PPCRecImlSegment_t* imlSegment; _ppcRecompilerSegmentPoint_t* next; _ppcRecompilerSegmentPoint_t* prev; }ppcRecompilerSegmentPoint_t; @@ -260,45 +105,7 @@ struct PPCRecVGPRDistances_t bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{}; }; -typedef struct _PPCRecImlSegment_t -{ - sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) - sint32 startOffset{}; // offset to first instruction in iml instruction list - sint32 count{}; // number of instructions in segment - uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) - uint32 x64Offset{}; // x64 code offset of segment start - uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) - // list of intermediate instructions in this segment - std::vector imlList; - // segment link - _PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch - _PPCRecImlSegment_t* nextSegmentBranchTaken{}; - bool nextSegmentIsUncertain{}; - sint32 loopDepth{}; - //sList_t* list_prevSegments; - std::vector<_PPCRecImlSegment_t*> list_prevSegments{}; - // PPC range of segment - uint32 ppcAddrMin{}; - uint32 ppcAddrMax{}; - // enterable segments - bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) - uint32 enterPPCAddress{}; // used if isEnterable is true - // jump destination segments - bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - uint32 jumpDestinationPPCAddress{}; - // PPC FPR use mask - bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR - // CR use mask - uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) - uint32 crBitsRead{}; // all bits that are read in this segment - uint32 crBitsWritten{}; // bits that are written in this segment - // register allocator info - PPCSegmentRegisterAllocatorInfo_t raInfo{}; - PPCRecVGPRDistances_t raDistances{}; - bool raRangeExtendProcessed{}; - // segment points - ppcRecompilerSegmentPoint_t* segmentPointList{}; -}PPCRecImlSegment_t; +#include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h" struct ppcImlGenContext_t { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 036c448e5..41c1bf38a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,239 +1,6 @@ #define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) -enum -{ - PPCREC_IML_OP_ASSIGN, // '=' operator - PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_ADD, // '+' operator - PPCREC_IML_OP_SUB, // '-' operator - PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit - PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) - PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) - PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) - PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result - PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result - PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide) - PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide) - PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit - PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag - PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag - // assign operators with cast - PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend - PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend - // binary operation - PPCREC_IML_OP_OR, // '|' operator - PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first - PPCREC_IML_OP_AND, // '&' operator - PPCREC_IML_OP_XOR, // '^' operator - PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator - PPCREC_IML_OP_LEFT_SHIFT, // shift left operator - PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) - PPCREC_IML_OP_NOT, // complement each bit - PPCREC_IML_OP_NEG, // negate - // ppc - PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) - PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) - PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) - PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) - PPCREC_IML_OP_CNTLZW, - PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry) - PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 - PPCREC_IML_OP_MFCR, // copy cr to gpr - PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) - // condition register - PPCREC_IML_OP_CR_CLEAR, // clear cr bit - PPCREC_IML_OP_CR_SET, // set cr bit - PPCREC_IML_OP_CR_OR, // OR cr bits - PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first - PPCREC_IML_OP_CR_AND, // AND cr bits - PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first - // FPU - PPCREC_IML_OP_FPR_ADD_BOTTOM, - PPCREC_IML_OP_FPR_ADD_PAIR, - PPCREC_IML_OP_FPR_SUB_PAIR, - PPCREC_IML_OP_FPR_SUB_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_PAIR, - PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, - PPCREC_IML_OP_FPR_DIVIDE_PAIR, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched - PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, - PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half - PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_TOP, - PPCREC_IML_OP_FPR_NEGATE_BOTTOM, - PPCREC_IML_OP_FPR_NEGATE_PAIR, - PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) - PPCREC_IML_OP_FPR_ABS_PAIR, - PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy) - PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy) - PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision - PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, - PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, - PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A - PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A - // PS - PPCREC_IML_OP_FPR_SUM0, - PPCREC_IML_OP_FPR_SUM1, -}; - -#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) - -enum -{ - PPCREC_IML_MACRO_BLR, // macro for BLR instruction code - PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code - PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code - PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code - PPCREC_IML_MACRO_BL, // call to different function (can be within same function) - PPCREC_IML_MACRO_B_FAR, // branch to different function - PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount - PPCREC_IML_MACRO_HLE, // HLE function call - PPCREC_IML_MACRO_MFTB, // get TB register value (low or high) - PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter - // debugging - PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak -}; - -enum -{ - PPCREC_JUMP_CONDITION_NONE, - PPCREC_JUMP_CONDITION_E, // equal / zero - PPCREC_JUMP_CONDITION_NE, // not equal / not zero - PPCREC_JUMP_CONDITION_LE, // less or equal - PPCREC_JUMP_CONDITION_L, // less - PPCREC_JUMP_CONDITION_GE, // greater or equal - PPCREC_JUMP_CONDITION_G, // greater - // special case: - PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling - PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow - -}; - -enum -{ - PPCREC_CR_MODE_COMPARE_SIGNED, - PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare - // others: PPCREC_CR_MODE_ARITHMETIC, - PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code) - PPCREC_CR_MODE_LOGICAL, -}; - -enum -{ - PPCREC_IML_TYPE_NONE, - PPCREC_IML_TYPE_NO_OP, // no-op instruction - PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction) - PPCREC_IML_TYPE_R_R, // r* (op) *r - PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* - PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* - PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] - PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] - PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* - PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r* - PPCREC_IML_TYPE_R_NAME, // r* = name - PPCREC_IML_TYPE_NAME_R, // name* = r* - PPCREC_IML_TYPE_R_S32, // r* (op) imm - PPCREC_IML_TYPE_MACRO, - PPCREC_IML_TYPE_CJUMP, // conditional jump - PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 - PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable - PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) - // conditional - PPCREC_IML_TYPE_CONDITIONAL_R_S32, - // FPR - PPCREC_IML_TYPE_FPR_R_NAME, // name = f* - PPCREC_IML_TYPE_FPR_NAME_R, // f* = name - PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) - PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) - PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) - PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode) - PPCREC_IML_TYPE_FPR_R_R, - PPCREC_IML_TYPE_FPR_R_R_R, - PPCREC_IML_TYPE_FPR_R_R_R_R, - PPCREC_IML_TYPE_FPR_R, - // special - PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated) - -}; - -enum -{ - PPCREC_NAME_NONE, - PPCREC_NAME_TEMPORARY, - PPCREC_NAME_R0 = 1000, - PPCREC_NAME_SPR0 = 2000, - PPCREC_NAME_FPR0 = 3000, - PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 - //PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away) -}; - -// special cases for LOAD/STORE -#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) -#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) -#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1 -#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2 -#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3 -#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1 -#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2 -#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3 - -#define PPC_REC_INVALID_REGISTER 0xFF - -#define PPCREC_CR_BIT_LT 0 -#define PPCREC_CR_BIT_GT 1 -#define PPCREC_CR_BIT_EQ 2 -#define PPCREC_CR_BIT_SO 3 - -enum -{ - // fpr load - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, - PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1, - // fpr store - PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0 - PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0 - - PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 - - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, -}; - bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor @@ -267,7 +34,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe // IML instruction generation (new style, can generate new instructions but also overwrite existing ones) void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth); void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index e31f196cf..791f32cfb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -280,21 +280,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } -void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth) -{ - // copy from memory to memory - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_MEM2MEM; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_mem2mem.src.registerMem = srcMemReg; - imlInstruction->op_mem2mem.src.immS32 = srcImmS32; - imlInstruction->op_mem2mem.dst.registerMem = dstMemReg; - imlInstruction->op_mem2mem.dst.immS32 = dstImmS32; - imlInstruction->op_mem2mem.copyWidth = copyWidth; -} - uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { if( mappedName == PPCREC_NAME_NONE ) @@ -3234,10 +3219,6 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment else strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); } - else if (inst.type == PPCREC_IML_TYPE_MEM2MEM) - { - strOutput.addFmt("{} [t{}+{}] = [t{}+{}]", inst.op_mem2mem.copyWidth, inst.op_mem2mem.dst.registerMem, inst.op_mem2mem.dst.immS32, inst.op_mem2mem.src.registerMem, inst.op_mem2mem.src.immS32); - } else if( inst.type == PPCREC_IML_TYPE_CJUMP ) { if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 6875a5252..30b76e6a8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -142,11 +142,6 @@ void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, cons if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - registersUsed->readNamedReg1 = imlInstruction->op_mem2mem.src.registerMem; - registersUsed->readNamedReg2 = imlInstruction->op_mem2mem.dst.registerMem; - } else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) { registersUsed->writtenNamedReg1 = imlInstruction->op_storeLoad.registerData; @@ -655,10 +650,6 @@ void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGen { // not affected } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - // not affected - } else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) { // not affected @@ -777,10 +768,6 @@ void PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, { // not affected } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - // not affected - } else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) { // not affected diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index 85e19371b..00cc8cdee 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -675,27 +675,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, return false; } -/* - * Copy byte/word/dword from memory to memory - */ -void PPCRecompilerX64Gen_imlInstruction_mem2mem(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - sint32 realSrcMemReg = tempToRealRegister(imlInstruction->op_mem2mem.src.registerMem); - sint32 realSrcMemImm = imlInstruction->op_mem2mem.src.immS32; - sint32 realDstMemReg = tempToRealRegister(imlInstruction->op_mem2mem.dst.registerMem); - sint32 realDstMemImm = imlInstruction->op_mem2mem.dst.immS32; - // PPCRecompilerX64Gen_crConditionFlags_forget() is not needed here, since MOVs don't affect eflags - if (imlInstruction->op_mem2mem.copyWidth == 32) - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realSrcMemReg, realSrcMemImm); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realDstMemReg, realDstMemImm, REG_RESV_TEMP); - } - else - { - assert_dbg(); - } -} - bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) @@ -2395,10 +2374,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - PPCRecompilerX64Gen_imlInstruction_mem2mem(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) { if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) From 5b2bc7e03a9fbb12a2fe8385b368ee91ab1c3c1c Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 5 Nov 2022 03:45:54 +0100 Subject: [PATCH 05/64] PPCRec: Rename IML structs for better clarity --- .../Espresso/Recompiler/IML/IMLInstruction.h | 48 +-- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 10 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 19 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 42 +-- .../Recompiler/PPCRecompilerImlAnalyzer.cpp | 12 +- .../Recompiler/PPCRecompilerImlGen.cpp | 291 +++++++++--------- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 16 +- .../Recompiler/PPCRecompilerImlOptimizer.cpp | 158 +++++----- .../Recompiler/PPCRecompilerImlRanges.cpp | 6 +- .../Recompiler/PPCRecompilerImlRanges.h | 4 +- .../PPCRecompilerImlRegisterAllocator.cpp | 110 +++---- .../Recompiler/PPCRecompilerIntermediate.cpp | 28 +- .../Espresso/Recompiler/PPCRecompilerX64.cpp | 38 +-- .../HW/Espresso/Recompiler/PPCRecompilerX64.h | 24 +- .../Recompiler/PPCRecompilerX64FPU.cpp | 16 +- 16 files changed, 398 insertions(+), 426 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 74427731e..e92fc6110 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -236,7 +236,7 @@ enum PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, }; -struct PPCRecImlInstruction_t +struct IMLInstruction { uint8 type; uint8 operation; @@ -274,9 +274,7 @@ struct PPCRecImlInstruction_t { // R/F = NAME or NAME = R/F uint8 registerIndex; - uint8 copyWidth; uint32 name; - uint8 flags; }op_r_name; struct { @@ -298,7 +296,7 @@ struct PPCRecImlInstruction_t struct { uint32 jumpmarkAddress; - bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress + bool jumpAccordingToSegment; //IMLSegment* destinationSegment; // if set, this replaces jumpmarkAddress uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) uint8 crRegisterIndex; uint8 crBitIndex; @@ -311,7 +309,6 @@ struct PPCRecImlInstruction_t uint8 registerMem2; uint8 registerGQR; uint8 copyWidth; - //uint8 flags; struct { bool swapEndian : 1; @@ -322,20 +319,6 @@ struct PPCRecImlInstruction_t sint32 immS32; }op_storeLoad; struct - { - struct - { - uint8 registerMem; - sint32 immS32; - }src; - struct - { - uint8 registerMem; - sint32 immS32; - }dst; - uint8 copyWidth; - }op_mem2mem; - struct { uint8 registerResult; uint8 registerOperand; @@ -359,7 +342,6 @@ struct PPCRecImlInstruction_t struct { uint8 registerResult; - //uint8 flags; }op_fpr_r; struct { @@ -384,4 +366,30 @@ struct PPCRecImlInstruction_t bool bitMustBeSet; }op_conditional_r_s32; }; + + // instruction setters + void make_jumpmark(uint32 address) + { + type = PPCREC_IML_TYPE_JUMPMARK; + op_jumpmark.address = address; + } + + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) + { + type = PPCREC_IML_TYPE_MACRO; + operation = macroId; + op_macro.param = param; + op_macro.param2 = param2; + op_macro.paramU16 = paramU16; + } + + void make_ppcEnter(uint32 ppcAddress) + { + type = PPCREC_IML_TYPE_PPC_ENTER; + operation = 0; + op_ppcEnter.ppcAddress = ppcAddress; + op_ppcEnter.x64Offset = 0; + associatedPPCAddress = 0; + } + }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index a9a734270..f95aa159f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,6 +1,6 @@ #pragma once -struct PPCRecImlSegment_t +struct IMLSegment { sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) sint32 startOffset{}; // offset to first instruction in iml instruction list @@ -9,13 +9,13 @@ struct PPCRecImlSegment_t uint32 x64Offset{}; // x64 code offset of segment start uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) // list of intermediate instructions in this segment - std::vector imlList; + std::vector imlList; // segment link - PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch - PPCRecImlSegment_t* nextSegmentBranchTaken{}; + IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch + IMLSegment* nextSegmentBranchTaken{}; bool nextSegmentIsUncertain{}; sint32 loopDepth{}; - std::vector list_prevSegments{}; + std::vector list_prevSegments{}; // PPC range of segment uint32 ppcAddrMin{}; uint32 ppcAddrMax{}; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 78d8fad99..09f10956a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -173,7 +173,7 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // collect list of PPC-->x64 entry points entryPointsOut.clear(); - for(PPCRecImlSegment_t* imlSegment : ppcImlGenContext.segmentList2) + for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { if (imlSegment->isEnterable == false) continue; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index d5d8bead8..bf774384d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -31,7 +31,7 @@ typedef struct typedef struct _ppcRecompilerSegmentPoint_t { sint32 index; - struct PPCRecImlSegment_t* imlSegment; + struct IMLSegment* imlSegment; _ppcRecompilerSegmentPoint_t* next; _ppcRecompilerSegmentPoint_t* prev; }ppcRecompilerSegmentPoint_t; @@ -57,7 +57,7 @@ struct raLivenessSubrangeLink_t struct raLivenessSubrange_t { struct raLivenessRange_t* range; - PPCRecImlSegment_t* imlSegment; + IMLSegment* imlSegment; ppcRecompilerSegmentPoint_t start; ppcRecompilerSegmentPoint_t end; // dirty state tracking @@ -107,6 +107,8 @@ struct PPCRecVGPRDistances_t #include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h" +struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext); + struct ppcImlGenContext_t { PPCRecFunction_t* functionRef; @@ -122,14 +124,11 @@ struct ppcImlGenContext_t // temporary floating point registers (single and double precision) uint32 mappedFPRRegister[256]; // list of intermediate instructions - PPCRecImlInstruction_t* imlList; + IMLInstruction* imlList; sint32 imlListSize; sint32 imlListCount; // list of segments - //PPCRecImlSegment_t** segmentList; - //sint32 segmentListSize; - //sint32 segmentListCount; - std::vector segmentList2; + std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode // register allocator info @@ -142,6 +141,12 @@ struct ppcImlGenContext_t { bool modifiesGQR[8]; }tracking; + + // append raw instruction + IMLInstruction& emitInst() + { + return *PPCRecompilerImlGen_generateNewEmptyInstruction(this); + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 41c1bf38a..e06bf6cfa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -4,13 +4,13 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor -PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount); -PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index); +IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); +IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index); void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count); -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index); +void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index); void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint); // GPR register management @@ -22,20 +22,20 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress); -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction); +void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress); +void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); +void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); +void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); // IML instruction generation (new style, can generate new instructions but also overwrite existing ones) -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction); +void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); +void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -113,15 +113,15 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml); +bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml); void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); -void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); -void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew); -void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); +void PPCRecompilerIml_setLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void PPCRecompilerIml_setLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void PPCRecompilerIML_relinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew); +void PPCRecompilerIML_removeLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); -PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment); +IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment); // IML analyzer typedef struct @@ -130,9 +130,9 @@ typedef struct uint32 writtenCRBits; }PPCRecCRTracking_t; -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment); -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking); +bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment); +bool PPCRecompilerImlAnalyzer_canTypeWriteCR(IMLInstruction* imlInstruction); +void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking); // IML optimizer bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); @@ -153,7 +153,7 @@ void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcIml // debug -void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void PPCRecompiler_dumpIMLSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); typedef struct @@ -185,4 +185,4 @@ typedef struct }; }PPCImlOptimizerUsedRegisters_t; -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); +void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const IMLInstruction* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp index 3ffe5aed1..8c976e8f2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp @@ -6,14 +6,14 @@ /* * Initializes a single segment and returns true if it is a finite loop */ -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) +bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment) { bool isTightFiniteLoop = false; // base criteria, must jump to beginning of same segment if (imlSegment->nextSegmentBranchTaken != imlSegment) return false; // loops using BDNZ are assumed to always be finite - for(const PPCRecImlInstruction_t& instIt : imlSegment->imlList) + for(const IMLInstruction& instIt : imlSegment->imlList) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8) { @@ -24,7 +24,7 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB) // this catches most loops with load-update and store-update instructions, but also those with decrementing counters FixedSizeList list_modifiedRegisters; - for (const PPCRecImlInstruction_t& instIt : imlSegment->imlList) + for (const IMLInstruction& instIt : imlSegment->imlList) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) ) { @@ -36,7 +36,7 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) // remove all registers from the list that are modified by non-ADD/SUB instructions // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out PPCImlOptimizerUsedRegisters_t registersUsed; - for (const PPCRecImlInstruction_t& instIt : imlSegment->imlList) + for (const IMLInstruction& instIt : imlSegment->imlList) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) continue; @@ -56,7 +56,7 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) /* * Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister) */ -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerImlAnalyzer_canTypeWriteCR(IMLInstruction* imlInstruction) { if (imlInstruction->type == PPCREC_IML_TYPE_R_R) return true; @@ -77,7 +77,7 @@ bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruct return false; } -void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking) +void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) { crTracking->readCRBits = 0; crTracking->writtenCRBits = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 791f32cfb..9d2cef0c4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -10,57 +10,57 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset); -PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) +IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) { sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - ppcImlGenContext->imlList = (PPCRecImlInstruction_t*)realloc(ppcImlGenContext->imlList, sizeof(PPCRecImlInstruction_t)*newSize); + ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); ppcImlGenContext->imlListSize = newSize; } - PPCRecImlInstruction_t* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - memset(imlInstruction, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; + memset(imlInstruction, 0x00, sizeof(IMLInstruction)); imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; ppcImlGenContext->imlListCount++; return imlInstruction; } - -void PPCRecompilerImlGen_generateNewInstruction_jumpmark(ppcImlGenContext_t* ppcImlGenContext, uint32 address) -{ - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_JUMPMARK; - imlInstruction->op_jumpmark.address = address; -} - -void PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext_t* ppcImlGenContext, uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) -{ - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_MACRO; - imlInstruction->operation = macroId; - imlInstruction->op_macro.param = param; - imlInstruction->op_macro.param2 = param2; - imlInstruction->op_macro.paramU16 = paramU16; -} - -/* - * Generates a marker for Interpreter -> Recompiler entrypoints - * PPC_ENTER iml instructions have no associated PPC address but the instruction itself has one - */ -void PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcAddress) -{ - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_PPC_ENTER; - imlInstruction->operation = 0; - imlInstruction->op_ppcEnter.ppcAddress = ppcAddress; - imlInstruction->op_ppcEnter.x64Offset = 0; - imlInstruction->associatedPPCAddress = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) +// +//void PPCRecompilerImlGen_generateNewInstruction_jumpmark(ppcImlGenContext_t* ppcImlGenContext, uint32 address) +//{ +// // no-op that indicates possible destination of a jump +// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); +// imlInstruction->type = PPCREC_IML_TYPE_JUMPMARK; +// imlInstruction->op_jumpmark.address = address; +//} +// +//void PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext_t* ppcImlGenContext, uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) +//{ +// // no-op that indicates possible destination of a jump +// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); +// imlInstruction->type = PPCREC_IML_TYPE_MACRO; +// imlInstruction->operation = macroId; +// imlInstruction->op_macro.param = param; +// imlInstruction->op_macro.param2 = param2; +// imlInstruction->op_macro.paramU16 = paramU16; +//} + +///* +// * Generates a marker for Interpreter -> Recompiler entrypoints +// * PPC_ENTER iml instructions have no associated PPC address but the instruction itself has one +// */ +//void PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcAddress) +//{ +// // no-op that indicates possible destination of a jump +// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); +// imlInstruction->type = PPCREC_IML_TYPE_PPC_ENTER; +// imlInstruction->operation = 0; +// imlInstruction->op_ppcEnter.ppcAddress = ppcAddress; +// imlInstruction->op_ppcEnter.x64Offset = 0; +// imlInstruction->associatedPPCAddress = 0; +//} + +void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) { // operation with two register operands (e.g. "t0 = t1") if(imlInstruction == NULL) @@ -76,7 +76,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { // operation with three register operands (e.g. "t0 = t1 + t4") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_R_R_R; imlInstruction->operation = operation; imlInstruction->crRegister = crRegister; @@ -89,7 +89,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcIml void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_R_R_S32; imlInstruction->operation = operation; imlInstruction->crRegister = crRegister; @@ -99,16 +99,14 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcI imlInstruction->op_r_r_s32.immS32 = immS32; } -void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name, uint32 copyWidth, bool signExtend, bool bigEndian) +void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) { // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_NAME_R; imlInstruction->operation = operation; imlInstruction->op_r_name.registerIndex = registerIndex; imlInstruction->op_r_name.name = name; - imlInstruction->op_r_name.copyWidth = copyWidth; - imlInstruction->op_r_name.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(bigEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); } void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode) @@ -116,7 +114,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcIml // two variations: // operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register) // operation with store (e.g. "'r3' = 123") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_R_S32; imlInstruction->operation = operation; imlInstruction->crRegister = crRegister; @@ -125,12 +123,12 @@ void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcIml imlInstruction->op_r_immS32.immS32 = immS32; } -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) +void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { if(imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); else - memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t)); + memset(imlInstruction, 0, sizeof(IMLInstruction)); imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; imlInstruction->operation = operation; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -144,13 +142,13 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte } -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress) +void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) { // jump if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); else - memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t)); + memset(imlInstruction, 0, sizeof(IMLInstruction)); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; @@ -162,7 +160,7 @@ void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlG } // jump based on segment branches -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) { // jump if (imlInstruction == NULL) @@ -178,7 +176,7 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* imlInstruction->op_conditionalJump.bitMustBeSet = false; } -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) { if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -193,7 +191,7 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen // multiple variations: // operation involving only one cr bit (like clear crD bit) // operation involving three cr bits (like crD = crA or crB) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CR; imlInstruction->operation = operation; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -206,7 +204,7 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { // conditional jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; @@ -219,7 +217,7 @@ void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD; imlInstruction->operation = 0; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -235,7 +233,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppc void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -251,7 +249,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE; imlInstruction->operation = 0; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -267,7 +265,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppc void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; @@ -400,10 +398,7 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { -//#ifdef CEMU_DEBUG_ASSERT -// PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); -//#endif - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); } bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -465,7 +460,7 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { // TBL / TBU uint32 param2 = spr | (rD << 16); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); return true; } return false; @@ -636,8 +631,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) return true; } // generate funtion call instructions - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); return true; } // is jump destination within recompiled function? @@ -649,7 +644,7 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) else { // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR) - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); } return true; } @@ -702,10 +697,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } // generate instruction - //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); return true; } return false; @@ -766,8 +761,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { // far jump PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -803,13 +798,13 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // store LR if( saveLR ) { - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } else { // branch always, no condition and no decrementer - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } else @@ -845,7 +840,7 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod } // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } return true; @@ -884,13 +879,13 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } else { // branch always, no condition and no decrementer - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } else @@ -926,7 +921,7 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco } // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } return true; @@ -1575,7 +1570,7 @@ void PPCRecompilerImlGen_LWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1596,7 +1591,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1619,7 +1614,7 @@ void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1640,7 +1635,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1664,7 +1659,7 @@ void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // special form where gpr is ignored and only imm is used // note: Darksiders 2 has this instruction form but it is never executed. - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1685,7 +1680,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1708,7 +1703,7 @@ void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1729,7 +1724,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -1815,7 +1810,7 @@ bool PPCRecompilerImlGen_LHAX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return true; } // load memory rA and rB into register @@ -1837,7 +1832,7 @@ bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return true; } // load memory rA and rB into register @@ -1861,7 +1856,7 @@ bool PPCRecompilerImlGen_LHZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return true; } // load memory rA and rB into register @@ -1883,7 +1878,7 @@ bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return true; } // load memory rA and rB into register @@ -1947,7 +1942,7 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (rA == 0) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return true; } // load memory rA and rB into register @@ -2015,7 +2010,7 @@ void PPCRecompilerImlGen_STW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // special form where gpr is ignored and only imm is used // note: Darksiders 2 has this instruction form but it is never executed. - //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -2034,7 +2029,7 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // store&update instructions where rD==rA store the register contents without added imm, therefore we need to handle it differently @@ -2060,7 +2055,7 @@ void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -2079,7 +2074,7 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // get memory gpr register @@ -2104,7 +2099,7 @@ void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // load memory gpr into register @@ -2123,7 +2118,7 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rA == 0 ) { // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); return; } // get memory gpr register @@ -2944,7 +2939,7 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_HLE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { uint32 hleFuncId = opcode&0xFFFF; - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0); return true; } @@ -2975,7 +2970,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext char _tempOpcodename[32]; -const char* PPCRecompiler_getOpcodeDebugName(const PPCRecImlInstruction_t* iml) +const char* PPCRecompiler_getOpcodeDebugName(const IMLInstruction* iml) { uint32 op = iml->operation; if (op == PPCREC_IML_OP_ASSIGN) @@ -3031,7 +3026,7 @@ void PPCRecDebug_addS32Param(StringBuf& strOutput, sint32 val, bool isLast = fal strOutput.addFmt("0x{:08x}, ", val); } -void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, PPCRecImlSegment_t* imlSegment, sint32 offset) +void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) { // pad to 70 characters sint32 index = currentLineText.getLen(); @@ -3072,7 +3067,7 @@ void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, PPCRe } } -void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) +void PPCRecompiler_dumpIMLSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -3102,7 +3097,7 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment for(sint32 i=0; iimlList.size(); i++) { - const PPCRecImlInstruction_t& inst = imlSegment->imlList[i]; + const IMLInstruction& inst = imlSegment->imlList[i]; // don't log NOP instructions unless they have an associated PPC address if(inst.type == PPCREC_IML_TYPE_NO_OP && inst.associatedPPCAddress == MPTR_NULL) continue; @@ -3439,7 +3434,7 @@ void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* } } -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index) +void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index) { segmentPoint->imlSegment = imlSegment; segmentPoint->index = index; @@ -3464,13 +3459,13 @@ void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoi * Insert multiple no-op instructions * Warning: Can invalidate any previous instruction structs from the same segment */ -void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount) +void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount) { cemu_assert_debug(index >= 0 && index <= imlSegment->imlList.size()); imlSegment->imlList.insert(imlSegment->imlList.begin() + index, shiftBackCount, {}); - memset(imlSegment->imlList.data() + index, 0, sizeof(PPCRecImlInstruction_t) * shiftBackCount); + memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction) * shiftBackCount); // fill empty space with NOP instructions for (sint32 i = 0; i < shiftBackCount; i++) @@ -3495,23 +3490,23 @@ void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint3 } } -PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index) +IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index) { PPCRecompiler_pushBackIMLInstructions(imlSegment, index, 1); return imlSegment->imlList.data() + index; } -PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlSegment) +IMLInstruction* PPCRecompiler_appendInstruction(IMLSegment* imlSegment) { size_t index = imlSegment->imlList.size(); imlSegment->imlList.emplace_back(); - memset(imlSegment->imlList.data() + index, 0, sizeof(PPCRecImlInstruction_t)); + memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction)); return imlSegment->imlList.data() + index; } -PPCRecImlSegment_t* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) +IMLSegment* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) { - PPCRecImlSegment_t* segment = new PPCRecImlSegment_t(); + IMLSegment* segment = new IMLSegment(); ppcImlGenContext->segmentList2.emplace_back(segment); return segment; } @@ -3520,7 +3515,7 @@ void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint3 { ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr); for (sint32 i = 0; i < count; i++) - ppcImlGenContext->segmentList2[index + i] = new PPCRecImlSegment_t(); + ppcImlGenContext->segmentList2[index + i] = new IMLSegment(); } void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) @@ -3531,7 +3526,7 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) ppcImlGenContext->imlList = nullptr; } - for (PPCRecImlSegment_t* imlSegment : ppcImlGenContext->segmentList2) + for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2) { //free(imlSegment->imlList); delete imlSegment; @@ -3551,7 +3546,7 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) //} } -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml) +bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml) { if (iml->type == PPCREC_IML_TYPE_MACRO && (iml->operation == PPCREC_IML_MACRO_BLR || iml->operation == PPCREC_IML_MACRO_BCTR) || iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BL || @@ -4438,12 +4433,11 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; ppcImlGenContext.cyclesSinceLastBranch++; - PPCRecompilerImlGen_generateNewInstruction_jumpmark(&ppcImlGenContext, addressOfCurrentInstruction); - + ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) { // add PPCEnter for addresses that are in entryAddresses - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); + ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); } else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) { @@ -4465,7 +4459,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext canInlineFunction = true; } if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); + ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); } if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) { @@ -4474,7 +4468,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) { // after unconditional BCTR instruction - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); + ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); } } } @@ -4498,7 +4492,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // optimize unused jumpmarks away // first, flag all jumpmarks as unused - std::map map_jumpMarks; + std::map map_jumpMarks; for(sint32 i=0; istartOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart+1; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4574,7 +4568,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // segment ends before current instruction if( segmentImlIndex > segmentStart ) { - PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); + IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4586,26 +4580,20 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if( segmentImlIndex != segmentStart ) { // final segment - PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); + IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; segmentStart = segmentImlIndex; } // move iml instructions into the segments - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { uint32 imlStartIndex = segIt->startOffset; uint32 imlCount = segIt->count; if( imlCount > 0 ) { - //segIt->imlListSize = imlCount + 4; - //segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t) * segIt->imlListSize); - //segIt->imlListCount = imlCount; - //memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); cemu_assert_debug(segIt->imlList.empty()); - //segIt->imlList.resize(imlCount); - //segIt->imlList.insert(segIt->imlList.begin() + imlStartIndex, ); segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); } @@ -4613,9 +4601,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext { // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code cemu_assert_debug(segIt->imlList.empty()); - //segIt->imlList = nullptr; - //segIt->imlListSize = 0; - //segIt->imlListCount = 0; } segIt->startOffset = 9999999; segIt->count = 9999999; @@ -4625,7 +4610,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext ppcImlGenContext.imlList = nullptr; ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { uint32 segmentPPCAddrMin = 0xFFFFFFFF; uint32 segmentPPCAddrMax = 0x00000000; @@ -4652,7 +4637,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // certain instructions can change the segment state // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) // jumpmarks mark the segment as a jump destination (within the same function) - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { while (segIt->imlList.size() > 0) { @@ -4692,15 +4677,15 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) continue; // not a branching segment - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt); + IMLInstruction* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt); if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) continue; - PPCRecImlSegment_t* conditionalSegment = segIt->nextSegmentBranchNotTaken; - PPCRecImlSegment_t* finalSegment = segIt->nextSegmentBranchTaken; + IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; + IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) continue; if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) @@ -4713,7 +4698,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext bool canReduceSegment = true; for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList.data() + f; + IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) continue; // todo: Register to register copy @@ -4734,7 +4719,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // append conditional moves based on branch condition for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList.data() + f; + IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); else @@ -4756,13 +4741,13 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIML_removeLink(segIt, finalSegment); if (finalSegment->nextSegmentBranchNotTaken) { - PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchNotTaken; + IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); PPCRecompilerIml_setLinkBranchNotTaken(segIt, tempSegment); } if (finalSegment->nextSegmentBranchTaken) { - PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchTaken; + IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment); } @@ -4770,7 +4755,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext cemu_assert_debug(segIt != finalSegment); for (sint32 f = 0; f < finalSegment->imlList.size(); f++) { - memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(PPCRecImlInstruction_t)); + memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); } finalSegment->imlList.clear(); @@ -4781,7 +4766,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { if( segIt->ppcAddrMin == 0 ) continue; @@ -4817,7 +4802,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext for(size_t s=0; sppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; if( imlSegment->imlList.empty() ) continue; if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) @@ -4839,12 +4824,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); imlSegment = NULL; - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1]; - PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2]; + IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0]; + IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1]; + IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2]; // create entry point segment PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; + IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; // relink segments PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); @@ -4920,7 +4905,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert name store instructions at the end of each segment but before branch instructions - for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { if(segIt->imlList.size() == 0 ) continue; // ignore empty segments diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 1efc41b89..fddc5293b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -6,7 +6,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; @@ -21,7 +21,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; @@ -37,7 +37,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // store to memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; @@ -52,7 +52,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = 0) { // store to memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; @@ -68,7 +68,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr OP fpr - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r_r.registerResult = registerResult; @@ -80,7 +80,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand1, uint8 registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr = OP (fpr,fpr) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r_r_r.registerResult = registerResult; @@ -93,7 +93,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperandA, uint8 registerOperandB, uint8 registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr = OP (fpr,fpr,fpr) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r_r_r_r.registerResult = registerResult; @@ -104,7 +104,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* imlInstruction->op_fpr_r_r_r_r.flags = 0; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister) { // OP (fpr) if(imlInstruction == NULL) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 30b76e6a8..4ea28062b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -3,7 +3,7 @@ #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) +void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const IMLInstruction* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) { registersUsed->readNamedReg1 = -1; registersUsed->readNamedReg2 = -1; @@ -433,7 +433,7 @@ sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) return reg; } -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) { if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) { @@ -608,7 +608,7 @@ void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGen } } -void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) +void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) { if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) { @@ -726,7 +726,7 @@ void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGen } } -void PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 fprRegisterSearched, sint32 fprRegisterReplaced) +void PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 fprRegisterSearched, sint32 fprRegisterReplaced) { if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) { @@ -891,7 +891,7 @@ sint32 PPCRecompiler_getNextRegisterToReplace(PPCImlOptimizerUsedRegisters_t* re return gprToReplace; } -bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) +bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) { PPCImlOptimizerUsedRegisters_t registersUsed; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &imlSegment->imlList[imlIndexStart], ®istersUsed); @@ -942,11 +942,11 @@ bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContex } -bool PPCRecompiler_hasSuffixInstruction(PPCRecImlSegment_t* imlSegment) +bool PPCRecompiler_hasSuffixInstruction(IMLSegment* imlSegment) { if (imlSegment->imlList.empty()) return false; - const PPCRecImlInstruction_t& imlInstruction = imlSegment->imlList.back(); + const IMLInstruction& imlInstruction = imlSegment->imlList.back(); if( imlInstruction.type == PPCREC_IML_TYPE_MACRO && (imlInstruction.operation == PPCREC_IML_MACRO_BLR || imlInstruction.operation == PPCREC_IML_MACRO_BCTR) || imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BL || imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_B_FAR || @@ -962,35 +962,31 @@ bool PPCRecompiler_hasSuffixInstruction(PPCRecImlSegment_t* imlSegment) return false; } -void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) +void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) { // store register sint32 imlIndexEdit = *imlIndex; PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0]; - memset(&imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0]; + memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr.type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; - imlInstructionItr.op_r_name.copyWidth = 32; - imlInstructionItr.op_r_name.flags = 0; imlIndexEdit++; // load new register if required if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained ) { PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - PPCRecImlInstruction_t& imlInstructionItr = imlSegment->imlList[imlIndexEdit]; - memset(&imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit]; + memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr.type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; - imlInstructionItr.op_r_name.copyWidth = 32; - imlInstructionItr.op_r_name.flags = 0; imlIndexEdit += 1; } // move last entry to current one @@ -1006,12 +1002,12 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // inefficient algorithm for optimizing away excess registers // we simply load, use and store excess registers into other unused registers when we need to // first we remove all name load and store instructions that involve out-of-bounds registers - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { size_t imlIndex = 0; while( imlIndex < segIt->imlList.size() ) { - PPCRecImlInstruction_t& imlInstructionItr = segIt->imlList[imlIndex]; + IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) { if( imlInstructionItr.op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) @@ -1025,7 +1021,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } } // replace registers - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { size_t imlIndex = 0; while( imlIndex < segIt->imlList.size() ) @@ -1085,47 +1081,39 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // add load/store before current instruction PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); if( replacedRegisterIsUsed ) { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } else imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; // name_gprToReplace = unusedRegister imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; // unusedRegister = name_unusedRegister imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); if( replacedRegisterIsUsed ) { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } else imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; @@ -1208,13 +1196,13 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon ppcRecManageRegisters_t rCtx = { 0 }; for (sint32 i = 0; i < 64; i++) rCtx.ppcRegToMapping[i] = -1; - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; size_t idx = 0; sint32 currentUseIndex = 0; PPCImlOptimizerUsedRegisters_t registersUsed; while (idx < imlSegment->imlList.size()) { - PPCRecImlInstruction_t& idxInst = imlSegment->imlList[idx]; + IMLInstruction& idxInst = imlSegment->imlList[idx]; if ( PPCRecompiler_isSuffixInstruction(&idxInst) ) break; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &idxInst, ®istersUsed); @@ -1264,14 +1252,12 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); // create unload instruction PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; + memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; imlInstructionTemp->op_r_name.registerIndex = (uint8)(unloadRegMapping - rCtx.currentMapping); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; idx++; // update mapping unloadRegMapping->isActive = false; @@ -1282,14 +1268,12 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon } // create load instruction PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; + memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; imlInstructionTemp->op_r_name.registerIndex = (uint8)(regMapping-rCtx.currentMapping); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; idx++; // update mapping regMapping->virtualReg = virtualFpr; @@ -1344,14 +1328,12 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon { if (rCtx.currentMapping[i].isActive == false) continue; - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; + memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; imlInstructionTemp->op_r_name.registerIndex = i; imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; idx++; } } @@ -1372,12 +1354,12 @@ bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) /* * Returns true if the loaded value is guaranteed to be overwritten */ -bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) +bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(size_t i=startIndex; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; //nameStoreInstruction->op_r_name.registerIndex PPCImlOptimizerUsedRegisters_t registersUsed; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); @@ -1393,12 +1375,12 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG /* * Returns true if the loaded value is guaranteed to be overwritten */ -bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) +bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(size_t i=startIndex; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex) @@ -1413,12 +1395,12 @@ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcI /* * Returns true if the loaded name is never changed */ -bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) +bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(sint32 i=startIndex; i>=0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.writtenNamedReg1 == registerIndex ) @@ -1436,12 +1418,12 @@ sint32 debugCallCounter1 = 0; /* * Returns true if the name is overwritten in the current or any following segments */ -bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) +bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { uint32 name = nameStoreInstruction->op_r_name.name; for(size_t i=startIndex; iimlList.size(); i++) { - const PPCRecImlInstruction_t& imlInstruction = imlSegment->imlList[i]; + const IMLInstruction& imlInstruction = imlSegment->imlList[i]; if(imlInstruction.type == PPCREC_IML_TYPE_R_NAME ) { // name is loaded before being written @@ -1472,12 +1454,12 @@ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcI /* * Returns true if the loaded FPR name is never changed */ -bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) +bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; for(sint32 i=startIndex; i>=0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; PPCImlOptimizerUsedRegisters_t registersUsed; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if( registersUsed.writtenFPR1 == registerIndex ) @@ -1491,7 +1473,7 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc return false; } -uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, uint32 currentOverwriteMask, uint32 currentReadMask, uint32 scanDepth) +uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, uint32 currentOverwriteMask, uint32 currentReadMask, uint32 scanDepth) { // is any bit overwritten but not read? uint32 overwriteMask = imlSegment->crBitsWritten&~imlSegment->crBitsInput; @@ -1527,7 +1509,7 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, P * Returns a mask of all CR bits that are overwritten (written but not read) in the segment and all it's following segments * If the write state of a CR bit cannot be determined, it is returned as 0 (not overwritten) */ -uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { if (imlSegment->nextSegmentIsUncertain) { @@ -1553,9 +1535,9 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PP void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { - for(PPCRecImlInstruction_t& instIt : segIt->imlList) + for(IMLInstruction& instIt : segIt->imlList) { if (instIt.type == PPCREC_IML_TYPE_CJUMP) { @@ -1615,9 +1597,9 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext } } // flag instructions that write to CR where we can ignore individual CR bits - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { - for (PPCRecImlInstruction_t& instIt : segIt->imlList) + for (IMLInstruction& instIt : segIt->imlList) { if( PPCRecompilerImlAnalyzer_canTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7 ) { @@ -1630,12 +1612,12 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext } } -bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) +bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) { PPCImlOptimizerUsedRegisters_t registersUsed; for (sint32 i = startIndex; i <= endIndex; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); if (registersUsed.writtenNamedReg1 == vreg) return true; @@ -1643,11 +1625,11 @@ bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenCont return false; } -sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* startSegment, sint32 startIndex, sint32 name) +sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) { // current segment sint32 currentIndex = startIndex; - PPCRecImlSegment_t* currentSegment = startSegment; + IMLSegment* currentSegment = startSegment; sint32 segmentIterateCount = 0; sint32 foundRegister = -1; while (true) @@ -1709,9 +1691,9 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml return -1; } -void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) +void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) { - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; + IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) return; @@ -1721,7 +1703,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI sint32 lastStore = -1; for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; if (PPCRecompiler_isSuffixInstruction(imlInstruction)) { break; @@ -1739,7 +1721,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI imlInstructionLoad->op_storeLoad.flags2.notExpanded = true; } // also set the flag for the store instruction - PPCRecImlInstruction_t* imlInstructionStore = imlInstruction; + IMLInstruction* imlInstructionStore = imlInstruction; imlInstructionStore->op_storeLoad.flags2.notExpanded = true; foundMatch = true; @@ -1766,7 +1748,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI if (foundMatch) { // insert expand instruction after store - PPCRecImlInstruction_t* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); + IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, fprIndex); } } @@ -1784,11 +1766,11 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { for (sint32 i = 0; i < segIt->imlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; + IMLInstruction* imlInstruction = segIt->imlList.data() + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); @@ -1801,9 +1783,9 @@ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContex } } -void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 gprIndex) +void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 gprIndex) { - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; + IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) return; bool foundMatch = false; @@ -1812,7 +1794,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp sint32 i = imlIndexLoad + 1; for (; i < scanRangeEnd; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; if (PPCRecompiler_isSuffixInstruction(imlInstruction)) { break; @@ -1824,7 +1806,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp break; if (imlInstruction->op_storeLoad.registerData == gprIndex) { - PPCRecImlInstruction_t* imlInstructionStore = imlInstruction; + IMLInstruction* imlInstructionStore = imlInstruction; if (foundMatch == false) { // switch the endian swap flag for the load instruction @@ -1851,7 +1833,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp if (foundMatch) { // insert expand instruction - PPCRecImlInstruction_t* newExpand = PPCRecompiler_insertInstruction(imlSegment, i); + IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, i); PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); } } @@ -1868,11 +1850,11 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp */ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { for (sint32 i = 0; i < segIt->imlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; + IMLInstruction* imlInstruction = segIt->imlList.data() + i; if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) { PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); @@ -1915,9 +1897,9 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 */ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { - for(PPCRecImlInstruction_t& instIt : segIt->imlList) + for(IMLInstruction& instIt : segIt->imlList) { if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { @@ -2031,9 +2013,9 @@ bool PPCRecompilerAnalyzer_checkForGPROverwrite(PPCImlOptimizerUsedRegisters_t* return false; } -void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) +void _reorderConditionModifyInstructions(IMLSegment* imlSegment) { - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); + IMLInstruction* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); // last instruction a conditional branch? if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) return; @@ -2049,7 +2031,7 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) sint32 unsafeInstructionIndex = -1; for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + i; + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; PPCRecompilerImlAnalyzer_getCRTracking(imlInstruction, &crTracking); if (crTracking.readCRBits != 0) return; // dont handle complex cases for now @@ -2127,8 +2109,8 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) assert_dbg(); #endif - PPCRecImlInstruction_t* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); - memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); + memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); PPCRecompilerImlGen_generateNewInstruction_noOp(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex); } @@ -2139,7 +2121,7 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) { // check if this segment has a conditional branch - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { _reorderConditionModifyInstructions(segIt); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp index d31c02d4b..176f2034d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp @@ -70,7 +70,7 @@ raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext return livenessRange; } -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex) +raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex) { raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj(); livenessSubrange->list_locations.resize(0); @@ -95,7 +95,7 @@ raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenConte void _unlinkSubrange(raLivenessSubrange_t* subrange) { - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; + IMLSegment* imlSegment = subrange->imlSegment; PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange); PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); } @@ -306,7 +306,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 subrange->list_locations.emplace_back(index, isRead, isWrite); } -sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment) +sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment) { sint32 v = imlSegment->loopDepth + 1; v *= 5; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h index 01970bbf3..28fbe9063 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h @@ -1,7 +1,7 @@ #pragma once raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name); -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex); +raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex); void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange); void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); @@ -17,7 +17,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange); // cost estimation -sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment); +sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range); sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range); sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index 8d41ad377..3d4546c1f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -3,9 +3,9 @@ #include "PPCRecompilerX64.h" #include "PPCRecompilerImlRanges.h" -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); +void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml); +bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml); uint32 recRACurrentIterationIndex = 0; @@ -15,7 +15,7 @@ uint32 PPCRecRA_getNextIterationIndex() return recRACurrentIterationIndex; } -bool _detectLoop(PPCRecImlSegment_t* currentSegment, sint32 depth, uint32 iterationIndex, PPCRecImlSegment_t* imlSegmentLoopBase) +bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase) { if (currentSegment == imlSegmentLoopBase) return true; @@ -47,7 +47,7 @@ bool _detectLoop(PPCRecImlSegment_t* currentSegment, sint32 depth, uint32 iterat return currentSegment->raInfo.isPartOfProcessedLoop; } -void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegmentLoopBase) +void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase) { uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex; @@ -57,7 +57,7 @@ void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_ } } -void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { if (imlSegment->nextSegmentIsUncertain) return; @@ -120,62 +120,54 @@ typedef struct uint16 registerName; }raLoadStoreInfo_t; -void PPCRecRA_insertGPRLoadInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) +void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = registerIndex; imlInstructionItr->op_r_name.name = registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } -void PPCRecRA_insertGPRLoadInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) +void PPCRecRA_insertGPRLoadInstructions(IMLSegment* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadCount); - memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*loadCount); + memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(IMLInstruction)*loadCount); for (sint32 i = 0; i < loadCount; i++) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); + IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = (uint8)loadList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } } -void PPCRecRA_insertGPRStoreInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) +void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = registerIndex; imlInstructionItr->op_r_name.name = registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } -void PPCRecRA_insertGPRStoreInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) +void PPCRecRA_insertGPRStoreInstructions(IMLSegment* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeCount); - memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*storeCount); + memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(IMLInstruction)*storeCount); for (sint32 i = 0; i < storeCount; i++) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); + IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = (uint8)storeList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; } } @@ -192,7 +184,7 @@ sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, si } // count how many instructions there are until physRegister is used by any subrange (returns 0 if register is in use at startIndex, and INT_MAX if not used for the remainder of the segment) -sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 physRegister) +sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, sint32 startIndex, sint32 physRegister) { sint32 minDistance = INT_MAX; // next @@ -227,7 +219,7 @@ uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; + IMLSegment* imlSegment = subrange->imlSegment; raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { @@ -254,7 +246,7 @@ uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } -void _sortSegmentAllSubrangesLinkedList(PPCRecImlSegment_t* imlSegment) +void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) { raLivenessSubrange_t* subrangeList[4096+1]; sint32 count = 0; @@ -318,7 +310,7 @@ void _sortSegmentAllSubrangesLinkedList(PPCRecImlSegment_t* imlSegment) #endif } -bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // sort subranges ascending by start index @@ -628,7 +620,7 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); } @@ -637,7 +629,7 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) bool done = false; for (sint32 d = maxLoopDepth; d >= 0; d--) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { if (segIt->loopDepth != d) continue; @@ -672,7 +664,7 @@ void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationI subrange->lastIterationIndex = iterationIndex; if (subrange->hasStoreDelayed) return; // no need to traverse this subrange - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; + IMLSegment* imlSegment = subrange->imlSegment; if (subrange->end.index != RA_INTER_RANGE_END) { // ending segment @@ -758,7 +750,7 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) } } -void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { sint16 virtualReg2PhysReg[PPC_REC_MAX_VIRTUAL_GPR]; for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) @@ -933,7 +925,7 @@ void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); } } @@ -949,7 +941,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen size_t segmentIndex = 0; while (segmentIndex < ppcImlGenContext->segmentList2.size()) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; if (imlSegment->nextSegmentIsUncertain) { segmentIndex++; @@ -971,9 +963,9 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen continue; } PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; - PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken; + IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; + IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; + IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); @@ -982,12 +974,12 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen // detect loops for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; imlSegment->momentaryIndex = s; } for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); } } @@ -1010,12 +1002,12 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) } -bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) +bool _isRangeDefined(IMLSegment* imlSegment, sint32 vGPR) { return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); } -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) { @@ -1049,13 +1041,13 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) { // for each register calculate min/max index of usage range within each segment - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt); } } -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) +raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) { if (imlSegment->raDistances.isProcessed[vGPR]) { @@ -1094,7 +1086,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG return subrange; } -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) { @@ -1146,7 +1138,7 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) { if (_isRangeDefined(imlSegment, vGPR) == false) { @@ -1157,7 +1149,7 @@ void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PP imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; } -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) { if (_isRangeDefined(imlSegment, vGPR) == false) { @@ -1175,7 +1167,7 @@ void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenConte } } -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) +void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, IMLSegment** route, sint32 routeDepth) { #ifdef CEMU_DEBUG_ASSERT if (routeDepth < 2) @@ -1193,7 +1185,7 @@ void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR); } -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) +void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) { if (routeDepth >= 64) { @@ -1229,7 +1221,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCR } } -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) +void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR) { #ifdef CEMU_DEBUG_ASSERT if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) @@ -1253,7 +1245,7 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRe return; // can't reach end // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch - PPCRecImlSegment_t* route[64]; + IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) { @@ -1265,7 +1257,7 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRe } } -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries { @@ -1282,16 +1274,16 @@ void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, #endif } -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - std::vector list_segments; + std::vector list_segments; list_segments.reserve(1000); sint32 index = 0; imlSegment->raRangeExtendProcessed = true; list_segments.push_back(imlSegment); while (index < list_segments.size()) { - PPCRecImlSegment_t* currentSegment = list_segments[index]; + IMLSegment* currentSegment = list_segments[index]; PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); // follow flow if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) @@ -1312,7 +1304,7 @@ void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) { for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; if (imlSegment->list_prevSegments.empty()) { if (imlSegment->raRangeExtendProcessed) @@ -1326,7 +1318,7 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) { for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; auto localLoopDepth = imlSegment->loopDepth; if (localLoopDepth <= 0) continue; // not inside a loop @@ -1365,7 +1357,7 @@ void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcIml // calculate liveness ranges for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index c8b5a827a..3d6013adb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -1,9 +1,9 @@ #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) +IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for(IMLSegment* segIt : ppcImlGenContext->segmentList2) { if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) { @@ -14,7 +14,7 @@ PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* return NULL; } -void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) +void PPCRecompilerIml_setLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { // make sure segments aren't already linked if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) @@ -27,7 +27,7 @@ void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, P imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); } -void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) +void PPCRecompilerIml_setLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { // make sure segments aren't already linked if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) @@ -40,7 +40,7 @@ void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCR imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); } -void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) +void PPCRecompilerIML_removeLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) { @@ -70,11 +70,11 @@ void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSeg /* * Replaces all links to segment orig with linkts to segment new */ -void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew) +void PPCRecompilerIML_relinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew) { while (imlSegmentOrig->list_prevSegments.size() != 0) { - PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0]; + IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0]; if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig) { PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig); @@ -97,10 +97,10 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) size_t segCount = ppcImlGenContext->segmentList2.size(); for(size_t s=0; ssegmentList2[s]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); - PPCRecImlSegment_t* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; + IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; // handle empty segment if( imlSegment->imlList.empty()) { @@ -111,11 +111,11 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) continue; } // check last instruction of segment - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); + IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { // find destination segment by ppc jump address - PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); if( jumpDestSegment ) { if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) @@ -145,12 +145,12 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont size_t initialSegmentCount = ppcImlGenContext->segmentList2.size(); for (size_t i = 0; i < initialSegmentCount; i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[i]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[i]; if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) { // spawn new segment at end PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1); - PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1]; + IMLSegment* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1]; entrySegment->isEnterable = true; entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction @@ -164,7 +164,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont } } -PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment) +IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment) { if (imlSegment->imlList.empty()) return nullptr; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index 00cc8cdee..21edc8106 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -79,7 +79,7 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si } } -void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { sint32 crRegister = imlInstruction->crRegister; if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) @@ -124,7 +124,7 @@ void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) hCPU->gpr[gprIndex] = (uint32)((coreTime>>32)&0xFFFFFFFF); } -bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL ) @@ -344,7 +344,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, /* * Load from memory */ -bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) +bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); @@ -502,7 +502,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p /* * Write to memory */ -bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) +bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); @@ -675,7 +675,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, return false; } -bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { @@ -989,7 +989,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp return true; } -bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { @@ -1140,7 +1140,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { @@ -1221,7 +1221,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR return false; } -bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) { @@ -1791,7 +1791,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, return true; } -bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { @@ -1981,7 +1981,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlSegment_t* imlSegment, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { @@ -2102,7 +2102,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) @@ -2119,7 +2119,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction /* * PPC condition register operation */ -bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // while these instruction do not directly affect eflags, they change the CR bit if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) @@ -2161,7 +2161,7 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc } -void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; // generate code @@ -2182,7 +2182,7 @@ void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunctio } } -void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) @@ -2211,7 +2211,7 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, assert_dbg(); } -void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) @@ -2278,12 +2278,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // generate iml instruction code bool codeGenerationFailed = false; - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { segIt->x64Offset = x64GenContext.codeBufferIndex; for(size_t i=0; iimlList.size(); i++) { - PPCRecImlInstruction_t* imlInstruction = segIt->imlList.data() + i; + IMLInstruction* imlInstruction = segIt->imlList.data() + i; if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) { @@ -2477,7 +2477,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo uint32 x64Offset = 0xFFFFFFFF; if (x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC) { - for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) { @@ -2494,7 +2494,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } else { - PPCRecImlSegment_t* destSegment = (PPCRecImlSegment_t*)x64GenContext.relocateOffsetTable[i].extraInfo; + IMLSegment* destSegment = (IMLSegment*)x64GenContext.relocateOffsetTable[i].extraInfo; x64Offset = destSegment->x64Offset; } uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h index 1d37a77e1..3df2b7619 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h @@ -1,12 +1,12 @@ -typedef struct +struct x64RelocEntry_t { uint32 offset; uint8 type; void* extraInfo; -}x64RelocEntry_t; +}; -typedef struct +struct x64GenContext_t { uint8* codeBuffer; sint32 codeBufferIndex; @@ -18,7 +18,7 @@ typedef struct x64RelocEntry_t* relocateOffsetTable; sint32 relocateOffsetTableSize; sint32 relocateOffsetTableCount; -}x64GenContext_t; +}; // Some of these are defined by winnt.h and gnu headers #undef REG_EAX @@ -140,15 +140,15 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed); -bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed); +bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); // ASM gen void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp index d83f67dec..618c51a28 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp @@ -4,7 +4,7 @@ #include "asm/x64util.h" #include "Common/cpu_features.h" -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) @@ -21,7 +21,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct } } -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) @@ -264,7 +264,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen } // load from memory -bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) +bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); @@ -591,7 +591,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe } // store to memory -bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) +bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); @@ -727,7 +727,7 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) } // FPR op FPR -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) @@ -1006,7 +1006,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction /* * FPR = op (fprA, fprB) */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); @@ -1099,7 +1099,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti /* * FPR = op (fprA, fprB, fprC) */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) @@ -1193,7 +1193,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc /* * Single FPR operation */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) From 625874a7534b7e639d6a10508cfa93bea7ee36ac Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 5 Nov 2022 05:06:21 +0100 Subject: [PATCH 06/64] PPCRec: Move debug printing + smaller clean up --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 4 + .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 471 +++++++++++++++ .../Espresso/Recompiler/IML/IMLInstruction.h | 17 + .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 10 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 5 + .../HW/Espresso/Recompiler/PPCRecompiler.h | 12 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 6 - .../Recompiler/PPCRecompilerImlGen.cpp | 534 ------------------ .../Recompiler/PPCRecompilerImlOptimizer.cpp | 31 +- .../PPCRecompilerImlRegisterAllocator.cpp | 8 +- .../Espresso/Recompiler/PPCRecompilerX64.cpp | 2 - 12 files changed, 520 insertions(+), 582 deletions(-) create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IML.h create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 6cb13acb2..91badaf53 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -67,10 +67,12 @@ add_library(CemuCafe HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h HW/Espresso/Recompiler/PPCRecompiler.cpp HW/Espresso/Recompiler/PPCRecompiler.h + HW/Espresso/Recompiler/IML/IML.h HW/Espresso/Recompiler/IML/IMLSegment.cpp HW/Espresso/Recompiler/IML/IMLSegment.h HW/Espresso/Recompiler/IML/IMLInstruction.cpp HW/Espresso/Recompiler/IML/IMLInstruction.h + HW/Espresso/Recompiler/IML/IMLDebug.cpp HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h new file mode 100644 index 000000000..6f1030870 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -0,0 +1,4 @@ + +// debug +void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp new file mode 100644 index 000000000..b8094bb8e --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -0,0 +1,471 @@ +#include "IML.h" +#include "IMLInstruction.h" +#include "IMLSegment.h" +#include "util/helpers/StringBuf.h" + +#include "Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h" + +const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) +{ + static char _tempOpcodename[32]; + uint32 op = iml->operation; + if (op == PPCREC_IML_OP_ASSIGN) + return "MOV"; + else if (op == PPCREC_IML_OP_ADD) + return "ADD"; + else if (op == PPCREC_IML_OP_SUB) + return "SUB"; + else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY) + return "ADDCSC"; + else if (op == PPCREC_IML_OP_OR) + return "OR"; + else if (op == PPCREC_IML_OP_AND) + return "AND"; + else if (op == PPCREC_IML_OP_XOR) + return "XOR"; + else if (op == PPCREC_IML_OP_LEFT_SHIFT) + return "LSH"; + else if (op == PPCREC_IML_OP_RIGHT_SHIFT) + return "RSH"; + else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) + return "MULS"; + else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) + return "DIVS"; + + sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type); + return _tempOpcodename; +} + +void IMLDebug_AppendRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false) +{ + if (isLast) + { + if (virtualRegister < 10) + strOutput.addFmt("t{} ", virtualRegister); + else + strOutput.addFmt("t{}", virtualRegister); + return; + } + if (virtualRegister < 10) + strOutput.addFmt("t{} , ", virtualRegister); + else + strOutput.addFmt("t{}, ", virtualRegister); +} + +void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) +{ + if (isLast) + { + strOutput.addFmt("0x{:08x}", val); + return; + } + strOutput.addFmt("0x{:08x}, ", val); +} + +void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) +{ + // pad to 70 characters + sint32 index = currentLineText.getLen(); + while (index < 70) + { + debug_printf(" "); + index++; + } + raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + if (offset == subrangeItr->start.index) + { + if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) + { + debug_printf("*%-2d", subrangeItr->range->virtualRegister); + } + else + { + debug_printf("|%-2d", subrangeItr->range->virtualRegister); + } + } + else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index ) + { + debug_printf("* "); + } + else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) + { + debug_printf("| "); + } + else + { + debug_printf(" "); + } + index += 3; + // next + subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + } +} + +void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) +{ + StringBuf strOutput(1024); + + strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); + if (imlSegment->isEnterable) + { + strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); + } + else if (imlSegment->isJumpDestination) + { + strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); + } + + debug_printf("%s\n", strOutput.c_str()); + + strOutput.reset(); + strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); + debug_printf("%s", strOutput.c_str()); + + if (printLivenessRangeInfo) + { + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + } + debug_printf("\n"); + + sint32 lineOffsetParameters = 18; + + for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + { + const IMLInstruction& inst = imlSegment->imlList[i]; + // don't log NOP instructions unless they have an associated PPC address + if (inst.type == PPCREC_IML_TYPE_NO_OP && inst.associatedPPCAddress == MPTR_NULL) + continue; + strOutput.reset(); + strOutput.addFmt("{:08x} ", inst.associatedPPCAddress); + if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) + { + if (inst.type == PPCREC_IML_TYPE_R_NAME) + strOutput.add("LD_NAME"); + else + strOutput.add("ST_NAME"); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.registerIndex); + + strOutput.addFmt("name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) + { + strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); + } + else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) + { + strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); + } + else + strOutput.add("ukn"); + strOutput.add(")"); + } + else if (inst.type == PPCREC_IML_TYPE_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true); + + if (inst.crRegister != PPC_REC_INVALID_REGISTER) + { + strOutput.addFmt(" -> CR{}", inst.crRegister); + } + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true); + if (inst.crRegister != PPC_REC_INVALID_REGISTER) + { + strOutput.addFmt(" -> CR{}", inst.crRegister); + } + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); + + if (inst.crRegister != PPC_REC_INVALID_REGISTER) + { + strOutput.addFmt(" -> CR{}", inst.crRegister); + } + } + else if (inst.type == PPCREC_IML_TYPE_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex); + IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); + + if (inst.crRegister != PPC_REC_INVALID_REGISTER) + { + strOutput.addFmt(" -> CR{}", inst.crRegister); + } + } + else if (inst.type == PPCREC_IML_TYPE_JUMPMARK) + { + strOutput.addFmt("jm_{:08x}:", inst.op_jumpmark.address); + } + else if (inst.type == PPCREC_IML_TYPE_PPC_ENTER) + { + strOutput.addFmt("ppcEnter_{:08x}:", inst.op_ppcEnter.ppcAddress); + } + else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || + inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); + + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); + else + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_CJUMP) + { + if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) + strOutput.add("JE"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) + strOutput.add("JNE"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) + strOutput.add("JG"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) + strOutput.add("JGE"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) + strOutput.add("JL"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) + strOutput.add("JLE"); + else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) + strOutput.add("JALW"); // jump always + else + cemu_assert_unimplemented(); + strOutput.addFmt(" jm_{:08x} (cr{})", inst.op_conditionalJump.jumpmarkAddress, inst.crRegister); + } + else if (inst.type == PPCREC_IML_TYPE_NO_OP) + { + strOutput.add("NOP"); + } + else if (inst.type == PPCREC_IML_TYPE_MACRO) + { + if (inst.operation == PPCREC_IML_MACRO_BLR) + { + strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_BLRL) + { + strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_BCTR) + { + strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_BCTRL) + { + strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_BL) + { + strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_B_FAR) + { + strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_LEAVE) + { + strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); + } + else if (inst.operation == PPCREC_IML_MACRO_HLE) + { + strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); + } + else if (inst.operation == PPCREC_IML_MACRO_MFTB) + { + strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); + } + else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) + { + strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); + } + else + { + strOutput.addFmt("MACRO ukn operation {}", inst.operation); + } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME) + { + strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + { + strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); + } + else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) + { + strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); + } + else + strOutput.add("ukn"); + strOutput.add(")"); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_NAME_R) + { + strOutput.addFmt("name_{} (", inst.op_r_name.name); + if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + { + strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); + } + else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) + { + strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); + } + else + strOutput.add("ukn"); + strOutput.addFmt(") = fpr_t{}", inst.op_r_name.registerIndex); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + { + strOutput.addFmt("fpr_t{} = ", inst.op_storeLoad.registerData); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [t{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + if (inst.op_storeLoad.flags2.notExpanded) + { + strOutput.addFmt(" "); + } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + { + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + { + strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + { + strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); + } + else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + { + strOutput.addFmt("t{} ", inst.op_conditional_r_s32.registerIndex); + bool displayAsHex = false; + if (inst.operation == PPCREC_IML_OP_ASSIGN) + { + displayAsHex = true; + strOutput.add("="); + } + else + strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); + if (displayAsHex) + strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); + else + strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); + strOutput.add(" (conditional)"); + if (inst.crRegister != PPC_REC_INVALID_REGISTER) + { + strOutput.addFmt(" -> and update CR{}", inst.crRegister); + } + } + else + { + strOutput.addFmt("Unknown iml type {}", inst.type); + } + debug_printf("%s", strOutput.c_str()); + if (printLivenessRangeInfo) + { + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i); + } + debug_printf("\n"); + } + // all ranges + if (printLivenessRangeInfo) + { + debug_printf("Ranges-VirtReg "); + raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + debug_printf("v%-2d", subrangeItr->range->virtualRegister); + subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + } + debug_printf("\n"); + debug_printf("Ranges-PhysReg "); + subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + debug_printf("p%-2d", subrangeItr->range->physicalRegister); + subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + } + debug_printf("\n"); + } + // branch info + debug_printf("Links from: "); + for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++) + { + if (i) + debug_printf(", "); + debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); + } + debug_printf("\n"); + debug_printf("Links to: "); + if (imlSegment->nextSegmentBranchNotTaken) + debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); + if (imlSegment->nextSegmentBranchTaken) + debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); + debug_printf("\n"); +} + +void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) +{ + for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) + { + IMLDebug_DumpSegment(ppcImlGenContext->segmentList2[i], i); + debug_printf("\n"); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index e92fc6110..34733c4f9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -367,6 +367,23 @@ struct IMLInstruction }op_conditional_r_s32; }; + bool IsSuffixInstruction() const + { + if (type == PPCREC_IML_TYPE_MACRO && (operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BCTR) || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BLRL || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BCTRL || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || + type == PPCREC_IML_TYPE_PPC_ENTER || + type == PPCREC_IML_TYPE_CJUMP || + type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + return true; + return false; + } + // instruction setters void make_jumpmark(uint32 address) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index e69de29bb..e7eb3b323 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -0,0 +1,10 @@ +#include "IMLInstruction.h" +#include "IMLSegment.h" + +bool IMLSegment::HasSuffixInstruction() const +{ + if (imlList.empty()) + return false; + const IMLInstruction& imlInstruction = imlList.back(); + return imlInstruction.IsSuffixInstruction(); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index f95aa159f..216e17488 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,4 +1,7 @@ #pragma once +#include "IMLInstruction.h" + +#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" // remove once dependency is gone struct IMLSegment { @@ -37,4 +40,6 @@ struct IMLSegment bool raRangeExtendProcessed{}; // segment points ppcRecompilerSegmentPoint_t* segmentPointList{}; + + bool HasSuffixInstruction() const; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index bf774384d..88bd1d946 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -1,4 +1,4 @@ -#include +#pragma once #define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0 #define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area @@ -8,23 +8,21 @@ #define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) -typedef struct +struct ppcRecRange_t { uint32 ppcAddress; uint32 ppcSize; - //void* x86Start; - //size_t x86Size; void* storedRange; -}ppcRecRange_t; +}; -typedef struct +struct PPCRecFunction_t { uint32 ppcAddress; uint32 ppcSize; // ppc code size of function void* x86Code; // pointer to x86 code size_t x86Size; std::vector list_ranges; -}PPCRecFunction_t; +}; #include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index e06bf6cfa..7ee5dffc0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -113,7 +113,6 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general -bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml); void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerIml_setLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); void PPCRecompilerIml_setLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); @@ -151,11 +150,6 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext); // late optimizations void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); -// debug - -void PPCRecompiler_dumpIMLSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); - - typedef struct { union diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 9d2cef0c4..7fdbff171 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -4,7 +4,6 @@ #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" #include "PPCRecompilerImlRanges.h" -#include "util/helpers/StringBuf.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); @@ -25,40 +24,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext ppcImlGenContext->imlListCount++; return imlInstruction; } -// -//void PPCRecompilerImlGen_generateNewInstruction_jumpmark(ppcImlGenContext_t* ppcImlGenContext, uint32 address) -//{ -// // no-op that indicates possible destination of a jump -// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); -// imlInstruction->type = PPCREC_IML_TYPE_JUMPMARK; -// imlInstruction->op_jumpmark.address = address; -//} -// -//void PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext_t* ppcImlGenContext, uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) -//{ -// // no-op that indicates possible destination of a jump -// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); -// imlInstruction->type = PPCREC_IML_TYPE_MACRO; -// imlInstruction->operation = macroId; -// imlInstruction->op_macro.param = param; -// imlInstruction->op_macro.param2 = param2; -// imlInstruction->op_macro.paramU16 = paramU16; -//} - -///* -// * Generates a marker for Interpreter -> Recompiler entrypoints -// * PPC_ENTER iml instructions have no associated PPC address but the instruction itself has one -// */ -//void PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcAddress) -//{ -// // no-op that indicates possible destination of a jump -// IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); -// imlInstruction->type = PPCREC_IML_TYPE_PPC_ENTER; -// imlInstruction->operation = 0; -// imlInstruction->op_ppcEnter.ppcAddress = ppcAddress; -// imlInstruction->op_ppcEnter.x64Offset = 0; -// imlInstruction->associatedPPCAddress = 0; -//} void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) { @@ -2968,472 +2933,6 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext return v; } -char _tempOpcodename[32]; - -const char* PPCRecompiler_getOpcodeDebugName(const IMLInstruction* iml) -{ - uint32 op = iml->operation; - if (op == PPCREC_IML_OP_ASSIGN) - return "MOV"; - else if (op == PPCREC_IML_OP_ADD) - return "ADD"; - else if (op == PPCREC_IML_OP_SUB) - return "SUB"; - else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY) - return "ADDCSC"; - else if (op == PPCREC_IML_OP_OR) - return "OR"; - else if (op == PPCREC_IML_OP_AND) - return "AND"; - else if (op == PPCREC_IML_OP_XOR) - return "XOR"; - else if (op == PPCREC_IML_OP_LEFT_SHIFT) - return "LSH"; - else if (op == PPCREC_IML_OP_RIGHT_SHIFT) - return "RSH"; - else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) - return "MULS"; - else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) - return "DIVS"; - - sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type); - return _tempOpcodename; -} - -void PPCRecDebug_addRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false) -{ - if (isLast) - { - if (virtualRegister < 10) - strOutput.addFmt("t{} ", virtualRegister); - else - strOutput.addFmt("t{}", virtualRegister); - return; - } - if (virtualRegister < 10) - strOutput.addFmt("t{} , ", virtualRegister); - else - strOutput.addFmt("t{}, ", virtualRegister); -} - -void PPCRecDebug_addS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) -{ - if (isLast) - { - strOutput.addFmt("0x{:08x}", val); - return; - } - strOutput.addFmt("0x{:08x}, ", val); -} - -void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) -{ - // pad to 70 characters - sint32 index = currentLineText.getLen(); - while (index < 70) - { - debug_printf(" "); - index++; - } - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while (subrangeItr) - { - if (offset == subrangeItr->start.index) - { - if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) - { - debug_printf("*%-2d", subrangeItr->range->virtualRegister); - } - else - { - debug_printf("|%-2d", subrangeItr->range->virtualRegister); - } - } - else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index ) - { - debug_printf("* "); - } - else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) - { - debug_printf("| "); - } - else - { - debug_printf(" "); - } - index += 3; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } -} - -void PPCRecompiler_dumpIMLSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) -{ - StringBuf strOutput(1024); - - strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); - if (imlSegment->isEnterable) - { - strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); - } - else if( imlSegment->isJumpDestination ) - { - strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - } - - debug_printf("%s\n", strOutput.c_str()); - - strOutput.reset(); - strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - debug_printf("%s", strOutput.c_str()); - - if (printLivenessRangeInfo) - { - PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); - } - debug_printf("\n"); - - sint32 lineOffsetParameters = 18; - - for(sint32 i=0; iimlList.size(); i++) - { - const IMLInstruction& inst = imlSegment->imlList[i]; - // don't log NOP instructions unless they have an associated PPC address - if(inst.type == PPCREC_IML_TYPE_NO_OP && inst.associatedPPCAddress == MPTR_NULL) - continue; - strOutput.reset(); - strOutput.addFmt("{:08x} ", inst.associatedPPCAddress); - if( inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) - { - if(inst.type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("LD_NAME"); - else - strOutput.add("ST_NAME"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_name.registerIndex); - - strOutput.addFmt("name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); - if( inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0+999) ) - { - strOutput.addFmt("r{}", inst.op_r_name.name-PPCREC_NAME_R0); - } - else if( inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0+999) ) - { - strOutput.addFmt("spr{}", inst.op_r_name.name-PPCREC_NAME_SPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if( inst.type == PPCREC_IML_TYPE_R_R ) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r.registerA, true); - - if( inst.crRegister != PPC_REC_INVALID_REGISTER ) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } - } - else if( inst.type == PPCREC_IML_TYPE_R_R_R ) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerA); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_r.registerB, true); - if( inst.crRegister != PPC_REC_INVALID_REGISTER ) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } - } - else if (inst.type == PPCREC_IML_TYPE_R_R_S32) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_s32.registerResult); - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_r_s32.registerA); - PPCRecDebug_addS32Param(strOutput, inst.op_r_r_s32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } - } - else if (inst.type == PPCREC_IML_TYPE_R_S32) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, inst.op_r_immS32.registerIndex); - PPCRecDebug_addS32Param(strOutput, inst.op_r_immS32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } - } - else if( inst.type == PPCREC_IML_TYPE_JUMPMARK ) - { - strOutput.addFmt("jm_{:08x}:", inst.op_jumpmark.address); - } - else if( inst.type == PPCREC_IML_TYPE_PPC_ENTER ) - { - strOutput.addFmt("ppcEnter_{:08x}:", inst.op_ppcEnter.ppcAddress); - } - else if(inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || - inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - if(inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); - - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); - - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, inst.op_storeLoad.registerData); - - if(inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); - else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - } - else if( inst.type == PPCREC_IML_TYPE_CJUMP ) - { - if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) - strOutput.add("JE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) - strOutput.add("JNE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) - strOutput.add("JG"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) - strOutput.add("JGE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) - strOutput.add("JL"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) - strOutput.add("JLE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) - strOutput.add("JALW"); // jump always - else - cemu_assert_unimplemented(); - strOutput.addFmt(" jm_{:08x} (cr{})", inst.op_conditionalJump.jumpmarkAddress, inst.crRegister); - } - else if( inst.type == PPCREC_IML_TYPE_NO_OP ) - { - strOutput.add("NOP"); - } - else if( inst.type == PPCREC_IML_TYPE_MACRO ) - { - if( inst.operation == PPCREC_IML_MACRO_BLR ) - { - strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_BLRL ) - { - strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_BCTR ) - { - strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_BCTRL ) - { - strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_BL ) - { - strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_B_FAR ) - { - strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if( inst.operation == PPCREC_IML_MACRO_LEAVE ) - { - strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); - } - else if( inst.operation == PPCREC_IML_MACRO_HLE ) - { - strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if( inst.operation == PPCREC_IML_MACRO_MFTB ) - { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if( inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES ) - { - strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); - } - else - { - strOutput.addFmt("MACRO ukn operation {}", inst.operation); - } - } - else if( inst.type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); - if( inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0+999) ) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name-PPCREC_NAME_FPR0); - } - else if( inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if( inst.type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - strOutput.addFmt("name_{} (", inst.op_r_name.name); - if( inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0+999) ) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name-PPCREC_NAME_FPR0); - } - else if( inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.addFmt(") = fpr_t{}", inst.op_r_name.registerIndex); - } - else if( inst.type == PPCREC_IML_TYPE_FPR_LOAD ) - { - strOutput.addFmt("fpr_t{} = ", inst.op_storeLoad.registerData); - if( inst.op_storeLoad.flags2.signExtend ) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32, inst.op_storeLoad.mode); - if (inst.op_storeLoad.flags2.notExpanded) - { - strOutput.addFmt(" "); - } - } - else if( inst.type == PPCREC_IML_TYPE_FPR_STORE ) - { - if( inst.op_storeLoad.flags2.signExtend ) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth/8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); - } - else if( inst.type == PPCREC_IML_TYPE_FPR_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); - } - else if( inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); - } - else if( inst.type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); - } - else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); - } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - strOutput.addFmt("t{} ", inst.op_conditional_r_s32.registerIndex); - bool displayAsHex = false; - if (inst.operation == PPCREC_IML_OP_ASSIGN) - { - displayAsHex = true; - strOutput.add("="); - } - else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); - if (displayAsHex) - strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); - else - strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); - strOutput.add(" (conditional)"); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> and update CR{}", inst.crRegister); - } - } - else - { - strOutput.addFmt("Unknown iml type {}", inst.type); - } - debug_printf("%s", strOutput.c_str()); - if (printLivenessRangeInfo) - { - PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, i); - } - debug_printf("\n"); - } - // all ranges - if (printLivenessRangeInfo) - { - debug_printf("Ranges-VirtReg "); - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - debug_printf("v%-2d", subrangeItr->range->virtualRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - debug_printf("\n"); - debug_printf("Ranges-PhysReg "); - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while (subrangeItr) - { - debug_printf("p%-2d", subrangeItr->range->physicalRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - debug_printf("\n"); - } - // branch info - debug_printf("Links from: "); - for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++) - { - if (i) - debug_printf(", "); - debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); - } - debug_printf("\n"); - debug_printf("Links to: "); - if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); - if (imlSegment->nextSegmentBranchTaken) - debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); - debug_printf("\n"); -} - -void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) -{ - for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) - { - PPCRecompiler_dumpIMLSegment(ppcImlGenContext->segmentList2[i], i); - debug_printf("\n"); - } -} - void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index) { segmentPoint->imlSegment = imlSegment; @@ -3528,39 +3027,9 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2) { - //free(imlSegment->imlList); delete imlSegment; } ppcImlGenContext->segmentList2.clear(); - - //for(sint32 i=0; isegmentListCount; i++) - //{ - // free(ppcImlGenContext->segmentList[i]->imlList); - // delete ppcImlGenContext->segmentList[i]; - //} - //ppcImlGenContext->segmentListCount = 0; - //if (ppcImlGenContext->segmentList) - //{ - // free(ppcImlGenContext->segmentList); - // ppcImlGenContext->segmentList = nullptr; - //} -} - -bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml) -{ - if (iml->type == PPCREC_IML_TYPE_MACRO && (iml->operation == PPCREC_IML_MACRO_BLR || iml->operation == PPCREC_IML_MACRO_BCTR) || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_B_FAR || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BLRL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BCTRL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_LEAVE || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_HLE || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_MFTB || - iml->type == PPCREC_IML_TYPE_PPC_ENTER || - iml->type == PPCREC_IML_TYPE_CJUMP || - iml->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - return true; - return false; } bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) @@ -4758,8 +4227,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); } finalSegment->imlList.clear(); - - //PPCRecompiler_dumpIML(ppcRecFunc, &ppcImlGenContext); } // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) @@ -4820,7 +4287,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // All segments are considered to be part of the same PPC instruction range // The first segment also retains the jump destination and enterable properties from the original segment. //debug_printf("--- Insert cycle counter check ---\n"); - //PPCRecompiler_dumpIML(ppcRecFunc, &ppcImlGenContext); PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); imlSegment = NULL; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 4ea28062b..9edbc6ff3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -942,26 +942,6 @@ bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContex } -bool PPCRecompiler_hasSuffixInstruction(IMLSegment* imlSegment) -{ - if (imlSegment->imlList.empty()) - return false; - const IMLInstruction& imlInstruction = imlSegment->imlList.back(); - if( imlInstruction.type == PPCREC_IML_TYPE_MACRO && (imlInstruction.operation == PPCREC_IML_MACRO_BLR || imlInstruction.operation == PPCREC_IML_MACRO_BCTR) || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BL || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_B_FAR || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BLRL || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_BCTRL || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_LEAVE || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_HLE || - imlInstruction.type == PPCREC_IML_TYPE_MACRO && imlInstruction.operation == PPCREC_IML_MACRO_MFTB || - imlInstruction.type == PPCREC_IML_TYPE_PPC_ENTER || - imlInstruction.type == PPCREC_IML_TYPE_CJUMP || - imlInstruction.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - return true; - return false; -} - void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) { // store register @@ -1203,7 +1183,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon while (idx < imlSegment->imlList.size()) { IMLInstruction& idxInst = imlSegment->imlList[idx]; - if ( PPCRecompiler_isSuffixInstruction(&idxInst) ) + if (idxInst.IsSuffixInstruction()) break; PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &idxInst, ®istersUsed); sint32 fprMatch[4]; @@ -1704,11 +1684,8 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - if (PPCRecompiler_isSuffixInstruction(imlInstruction)) - { + if (imlInstruction->IsSuffixInstruction()) break; - } - // check if FPR is stored if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) || (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0)) @@ -1795,10 +1772,8 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp for (; i < scanRangeEnd; i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - if (PPCRecompiler_isSuffixInstruction(imlInstruction)) - { + if (imlInstruction->IsSuffixInstruction()) break; - } // check if GPR is stored if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) ) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index 3d4546c1f..afe6d943c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -5,8 +5,6 @@ void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); -bool PPCRecompiler_isSuffixInstruction(IMLInstruction* iml); - uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() @@ -759,7 +757,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; sint32 index = 0; - sint32 suffixInstructionCount = (imlSegment->imlList.size() > 0 && PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + imlSegment->imlList.size() - 1)) ? 1 : 0; + sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; //for (auto& subrange : imlSegment->raInfo.list_subranges) @@ -1020,7 +1018,7 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, while (index < imlSegment->imlList.size()) { // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + index)) + if (imlSegment->imlList[index].IsSuffixInstruction()) break; // get accessed GPRs PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); @@ -1113,7 +1111,7 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, while (index < imlSegment->imlList.size()) { // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList.data() + index)) + if (imlSegment->imlList[index].IsSuffixInstruction()) break; // get accessed GPRs PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index 21edc8106..6a3dd39d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -2266,8 +2266,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) return codeMem; } -void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); - bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { x64GenContext_t x64GenContext = {0}; From 101a2ef911a0aa3ab688ffc39f61da0993cac6ac Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 5 Nov 2022 08:27:30 +0100 Subject: [PATCH 07/64] PPCRec: Move analyzer file + move some funcs to IMLInstruction --- src/Cafe/CMakeLists.txt | 2 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 15 + .../IMLAnalyzer.cpp} | 19 +- .../Recompiler/IML/IMLInstruction.cpp | 846 ++++++++++++++++ .../Espresso/Recompiler/IML/IMLInstruction.h | 34 + .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 42 - .../Recompiler/PPCRecompilerImlGen.cpp | 9 +- .../Recompiler/PPCRecompilerImlOptimizer.cpp | 924 +----------------- .../PPCRecompilerImlRegisterAllocator.cpp | 18 +- 9 files changed, 961 insertions(+), 948 deletions(-) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerImlAnalyzer.cpp => IML/IMLAnalyzer.cpp} (89%) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 91badaf53..bbe28a9a1 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -73,7 +73,7 @@ add_library(CemuCafe HW/Espresso/Recompiler/IML/IMLInstruction.cpp HW/Espresso/Recompiler/IML/IMLInstruction.h HW/Espresso/Recompiler/IML/IMLDebug.cpp - HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp + HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp HW/Espresso/Recompiler/PPCRecompilerIml.h diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 6f1030870..6619e75a6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -1,3 +1,18 @@ +#pragma once + +#include "IMLInstruction.h" +#include "IMLSegment.h" + +// analyzer +struct PPCRecCRTracking_t +{ + uint32 readCRBits; + uint32 writtenCRBits; +}; + +bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); +bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); +void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking); // debug void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp similarity index 89% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp rename to src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 8c976e8f2..160554d6f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -1,12 +1,13 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" +#include "IML.h" +//#include "PPCRecompilerIml.h" #include "util/helpers/fixedSizeList.h" + #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" /* * Initializes a single segment and returns true if it is a finite loop */ -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment) +bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) { bool isTightFiniteLoop = false; // base criteria, must jump to beginning of same segment @@ -35,12 +36,12 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment) { // remove all registers from the list that are modified by non-ADD/SUB instructions // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; for (const IMLInstruction& instIt : imlSegment->imlList) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) continue; - PPCRecompiler_checkRegisterUsage(nullptr, &instIt, ®istersUsed); + instIt.CheckRegisterUsage(®istersUsed); if(registersUsed.writtenNamedReg1 < 0) continue; list_modifiedRegisters.remove(registersUsed.writtenNamedReg1); @@ -56,7 +57,7 @@ bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment) /* * Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister) */ -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(IMLInstruction* imlInstruction) +bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) { if (imlInstruction->type == PPCREC_IML_TYPE_R_R) return true; @@ -77,7 +78,7 @@ bool PPCRecompilerImlAnalyzer_canTypeWriteCR(IMLInstruction* imlInstruction) return false; } -void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) +void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) { crTracking->readCRBits = 0; crTracking->writtenCRBits = 0; @@ -125,7 +126,7 @@ void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCR else assert_dbg(); } - else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) + else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) { crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); } @@ -134,4 +135,4 @@ void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCR // overwrites CR0 crTracking->writtenCRBits |= (0xF << 0); } -} +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index e69de29bb..1a0d8c559 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -0,0 +1,846 @@ +#include "IMLInstruction.h" +#include "IML.h" + +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" + +void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const +{ + registersUsed->readNamedReg1 = -1; + registersUsed->readNamedReg2 = -1; + registersUsed->readNamedReg3 = -1; + registersUsed->writtenNamedReg1 = -1; + registersUsed->readFPR1 = -1; + registersUsed->readFPR2 = -1; + registersUsed->readFPR3 = -1; + registersUsed->readFPR4 = -1; + registersUsed->writtenFPR1 = -1; + if (type == PPCREC_IML_TYPE_R_NAME) + { + registersUsed->writtenNamedReg1 = op_r_name.registerIndex; + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + registersUsed->readNamedReg1 = op_r_name.registerIndex; + } + else if (type == PPCREC_IML_TYPE_R_R) + { + if (operation == PPCREC_IML_OP_COMPARE_SIGNED || operation == PPCREC_IML_OP_COMPARE_UNSIGNED || operation == PPCREC_IML_OP_DCBZ) + { + // both operands are read only + registersUsed->readNamedReg1 = op_r_r.registerResult; + registersUsed->readNamedReg2 = op_r_r.registerA; + } + else if ( + operation == PPCREC_IML_OP_OR || + operation == PPCREC_IML_OP_AND || + operation == PPCREC_IML_OP_XOR || + operation == PPCREC_IML_OP_ADD || + operation == PPCREC_IML_OP_ADD_CARRY || + operation == PPCREC_IML_OP_ADD_CARRY_ME || + operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) + { + // result is read and written, operand is read + registersUsed->writtenNamedReg1 = op_r_r.registerResult; + registersUsed->readNamedReg1 = op_r_r.registerResult; + registersUsed->readNamedReg2 = op_r_r.registerA; + } + else if ( + operation == PPCREC_IML_OP_ASSIGN || + operation == PPCREC_IML_OP_ENDIAN_SWAP || + operation == PPCREC_IML_OP_CNTLZW || + operation == PPCREC_IML_OP_NOT || + operation == PPCREC_IML_OP_NEG || + operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 || + operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) + { + // result is written, operand is read + registersUsed->writtenNamedReg1 = op_r_r.registerResult; + registersUsed->readNamedReg1 = op_r_r.registerA; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + if (operation == PPCREC_IML_OP_COMPARE_SIGNED || operation == PPCREC_IML_OP_COMPARE_UNSIGNED || operation == PPCREC_IML_OP_MTCRF) + { + // operand register is read only + registersUsed->readNamedReg1 = op_r_immS32.registerIndex; + } + else if (operation == PPCREC_IML_OP_ADD || + operation == PPCREC_IML_OP_SUB || + operation == PPCREC_IML_OP_AND || + operation == PPCREC_IML_OP_OR || + operation == PPCREC_IML_OP_XOR || + operation == PPCREC_IML_OP_LEFT_ROTATE) + { + // operand register is read and write + registersUsed->readNamedReg1 = op_r_immS32.registerIndex; + registersUsed->writtenNamedReg1 = op_r_immS32.registerIndex; + } + else + { + // operand register is write only + // todo - use explicit lists, avoid default cases + registersUsed->writtenNamedReg1 = op_r_immS32.registerIndex; + } + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + { + if (operation == PPCREC_IML_OP_ASSIGN) + { + // result is written, but also considered read (in case the condition fails) + registersUsed->readNamedReg1 = op_conditional_r_s32.registerIndex; + registersUsed->writtenNamedReg1 = op_conditional_r_s32.registerIndex; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + if (operation == PPCREC_IML_OP_RLWIMI) + { + // result and operand register are both read, result is written + registersUsed->writtenNamedReg1 = op_r_r_s32.registerResult; + registersUsed->readNamedReg1 = op_r_r_s32.registerResult; + registersUsed->readNamedReg2 = op_r_r_s32.registerA; + } + else + { + // result is write only and operand is read only + registersUsed->writtenNamedReg1 = op_r_r_s32.registerResult; + registersUsed->readNamedReg1 = op_r_r_s32.registerA; + } + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + // in all cases result is written and other operands are read only + registersUsed->writtenNamedReg1 = op_r_r_r.registerResult; + registersUsed->readNamedReg1 = op_r_r_r.registerA; + registersUsed->readNamedReg2 = op_r_r_r.registerB; + } + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BLRL || operation == PPCREC_IML_MACRO_BCTR || operation == PPCREC_IML_MACRO_BCTRL || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB) + { + // no effect on registers + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + registersUsed->writtenNamedReg1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + registersUsed->writtenNamedReg1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_STORE) + { + registersUsed->readNamedReg1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg2 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + registersUsed->readNamedReg1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg2 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg3 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_CR) + { + // only affects cr register + } + else if (type == PPCREC_IML_TYPE_JUMPMARK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_PPC_ENTER) + { + // no op + } + else if (type == PPCREC_IML_TYPE_FPR_R_NAME) + { + // fpr operation + registersUsed->writtenFPR1 = op_r_name.registerIndex; + } + else if (type == PPCREC_IML_TYPE_FPR_NAME_R) + { + // fpr operation + registersUsed->readFPR1 = op_r_name.registerIndex; + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + // fpr load operation + registersUsed->writtenFPR1 = op_storeLoad.registerData; + // address is in gpr register + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + // determine partially written result + switch (op_storeLoad.mode) + { + case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: + case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: + cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + registersUsed->readNamedReg2 = op_storeLoad.registerGQR; + break; + case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: + // PS1 remains the same + registersUsed->readFPR4 = op_storeLoad.registerData; + break; + case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: + break; + default: + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + // fpr load operation + registersUsed->writtenFPR1 = op_storeLoad.registerData; + // address is in gpr registers + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + // determine partially written result + switch (op_storeLoad.mode) + { + case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: + case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: + cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + registersUsed->readNamedReg3 = op_storeLoad.registerGQR; + break; + case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: + // PS1 remains the same + registersUsed->readFPR4 = op_storeLoad.registerData; + break; + case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: + case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: + case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: + break; + default: + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + // fpr store operation + registersUsed->readFPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + // PSQ generic stores also access GQR + switch (op_storeLoad.mode) + { + case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: + case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: + cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + registersUsed->readNamedReg2 = op_storeLoad.registerGQR; + break; + default: + break; + } + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + // fpr store operation + registersUsed->readFPR1 = op_storeLoad.registerData; + // address is in gpr registers + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg1 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + // PSQ generic stores also access GQR + switch (op_storeLoad.mode) + { + case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: + case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: + cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + registersUsed->readNamedReg3 = op_storeLoad.registerGQR; + break; + default: + break; + } + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + // fpr operation + if (operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP || + operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || + operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || + operation == PPCREC_IML_OP_ASSIGN || + operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP || + operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || + operation == PPCREC_IML_OP_FPR_ABS_PAIR || + operation == PPCREC_IML_OP_FPR_FRES_PAIR || + operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) + { + // operand read, result written + registersUsed->readFPR1 = op_fpr_r_r.registerOperand; + registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + } + else if ( + operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || + operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP || + operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP || + operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM || + operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || + operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ || + operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT + ) + { + // operand read, result read and (partially) written + registersUsed->readFPR1 = op_fpr_r_r.registerOperand; + registersUsed->readFPR4 = op_fpr_r_r.registerResult; + registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + } + else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || + operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || + operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM || + operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR || + operation == PPCREC_IML_OP_FPR_ADD_BOTTOM || + operation == PPCREC_IML_OP_FPR_ADD_PAIR || + operation == PPCREC_IML_OP_FPR_SUB_PAIR || + operation == PPCREC_IML_OP_FPR_SUB_BOTTOM) + { + // operand read, result read and written + registersUsed->readFPR1 = op_fpr_r_r.registerOperand; + registersUsed->readFPR2 = op_fpr_r_r.registerResult; + registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + + } + else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || + operation == PPCREC_IML_OP_FPR_FCMPU_TOP || + operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) + { + // operand read, result read + registersUsed->readFPR1 = op_fpr_r_r.registerOperand; + registersUsed->readFPR2 = op_fpr_r_r.registerResult; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + // fpr operation + registersUsed->readFPR1 = op_fpr_r_r_r.registerOperandA; + registersUsed->readFPR2 = op_fpr_r_r_r.registerOperandB; + registersUsed->writtenFPR1 = op_fpr_r_r_r.registerResult; + // handle partially written result + switch (operation) + { + case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: + case PPCREC_IML_OP_FPR_ADD_BOTTOM: + case PPCREC_IML_OP_FPR_SUB_BOTTOM: + registersUsed->readFPR4 = op_fpr_r_r_r.registerResult; + break; + case PPCREC_IML_OP_FPR_SUB_PAIR: + break; + default: + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + // fpr operation + registersUsed->readFPR1 = op_fpr_r_r_r_r.registerOperandA; + registersUsed->readFPR2 = op_fpr_r_r_r_r.registerOperandB; + registersUsed->readFPR3 = op_fpr_r_r_r_r.registerOperandC; + registersUsed->writtenFPR1 = op_fpr_r_r_r_r.registerResult; + // handle partially written result + switch (operation) + { + case PPCREC_IML_OP_FPR_SELECT_BOTTOM: + registersUsed->readFPR4 = op_fpr_r_r_r_r.registerResult; + break; + case PPCREC_IML_OP_FPR_SUM0: + case PPCREC_IML_OP_FPR_SUM1: + case PPCREC_IML_OP_FPR_SELECT_PAIR: + break; + default: + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + // fpr operation + if (operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM || + operation == PPCREC_IML_OP_FPR_ABS_BOTTOM || + operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM || + operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || + operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || + operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR) + { + registersUsed->readFPR1 = op_fpr_r.registerResult; + registersUsed->writtenFPR1 = op_fpr_r.registerResult; + } + else + cemu_assert_unimplemented(); + } + else + { + cemu_assert_unimplemented(); + } +} + +#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) + +sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) +{ + for (sint32 i = 0; i < 4; i++) + { + if (match[i] < 0) + continue; + if (reg == match[i]) + { + return replaced[i]; + } + } + return reg; +} + +void IMLInstruction::ReplaceGPRRegisterUsageMultiple(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +{ + if (type == PPCREC_IML_TYPE_R_NAME) + { + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_R_R) + { + op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + { + op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + // in all cases result is written and other operand is read only + op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + // in all cases result is written and other operands are read only + op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); + op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BLRL || operation == PPCREC_IML_MACRO_BCTR || operation == PPCREC_IML_MACRO_BCTRL || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB || operation == PPCREC_IML_MACRO_COUNT_CYCLES) + { + // no effect on registers + } + else + { + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_STORE) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_CR) + { + // only affects cr register + } + else if (type == PPCREC_IML_TYPE_JUMPMARK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_PPC_ENTER) + { + // no op + } + else if (type == PPCREC_IML_TYPE_FPR_R_NAME) + { + + } + else if (type == PPCREC_IML_TYPE_FPR_NAME_R) + { + + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + } + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + } + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + } + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + } + if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + { + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + } + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + } + else + { + cemu_assert_unimplemented(); + } +} + +void IMLInstruction::ReplaceFPRRegisterUsageMultiple(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) +{ + if (type == PPCREC_IML_TYPE_R_NAME) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_STORE) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_CR) + { + // only affects cr register + } + else if (type == PPCREC_IML_TYPE_JUMPMARK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_PPC_ENTER) + { + // no op + } + else if (type == PPCREC_IML_TYPE_FPR_R_NAME) + { + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_NAME_R) + { + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + op_fpr_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.registerOperand = replaceRegisterMultiple(op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + op_fpr_r_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.registerOperandA = replaceRegisterMultiple(op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.registerOperandB = replaceRegisterMultiple(op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + op_fpr_r_r_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandA = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandB = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandC = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + } + else + { + cemu_assert_unimplemented(); + } +} + +void IMLInstruction::ReplaceFPRRegisterUsage(sint32 fprRegisterSearched, sint32 fprRegisterReplaced) +{ + if (type == PPCREC_IML_TYPE_R_NAME) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_STORE) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + // not affected + } + else if (type == PPCREC_IML_TYPE_CR) + { + // only affects cr register + } + else if (type == PPCREC_IML_TYPE_JUMPMARK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_PPC_ENTER) + { + // no op + } + else if (type == PPCREC_IML_TYPE_FPR_R_NAME) + { + op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_NAME_R) + { + op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + op_fpr_r_r.registerResult = replaceRegister(op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.registerOperand = replaceRegister(op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + op_fpr_r_r_r.registerResult = replaceRegister(op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.registerOperandA = replaceRegister(op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.registerOperandB = replaceRegister(op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + op_fpr_r_r_r_r.registerResult = replaceRegister(op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandA = replaceRegister(op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandB = replaceRegister(op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.registerOperandC = replaceRegister(op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + op_fpr_r.registerResult = replaceRegister(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + } + else + { + cemu_assert_unimplemented(); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 34733c4f9..b72e7a67a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -236,6 +236,35 @@ enum PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, }; +struct IMLUsedRegisters +{ + union + { + struct + { + sint16 readNamedReg1; + sint16 readNamedReg2; + sint16 readNamedReg3; + sint16 writtenNamedReg1; + }; + sint16 gpr[4]; // 3 read + 1 write + }; + // FPR + union + { + struct + { + // note: If destination operand is not fully written (PS0 and PS1) it will be added to the read registers + sint16 readFPR1; + sint16 readFPR2; + sint16 readFPR3; + sint16 readFPR4; + sint16 writtenFPR1; + }; + sint16 fpr[4]; + }; +}; + struct IMLInstruction { uint8 type; @@ -409,4 +438,9 @@ struct IMLInstruction associatedPPCAddress = 0; } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; + + void ReplaceGPRRegisterUsageMultiple(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + void ReplaceFPRRegisterUsageMultiple(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); + void ReplaceFPRRegisterUsage(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 7ee5dffc0..1af1ee087 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -122,17 +122,6 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment); -// IML analyzer -typedef struct -{ - uint32 readCRBits; - uint32 writtenCRBits; -}PPCRecCRTracking_t; - -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(IMLSegment* imlSegment); -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(IMLInstruction* imlInstruction); -void PPCRecompilerImlAnalyzer_getCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking); - // IML optimizer bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); @@ -149,34 +138,3 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext); // late optimizations void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); - -typedef struct -{ - union - { - struct - { - sint16 readNamedReg1; - sint16 readNamedReg2; - sint16 readNamedReg3; - sint16 writtenNamedReg1; - }; - sint16 gpr[4]; // 3 read + 1 write - }; - // FPR - union - { - struct - { - // note: If destination operand is not fully written, it will be added as a read FPR as well - sint16 readFPR1; - sint16 readFPR2; - sint16 readFPR3; - sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten - sint16 writtenFPR1; - }; - sint16 fpr[4]; - }; -}PPCImlOptimizerUsedRegisters_t; - -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const IMLInstruction* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 7fdbff171..4e91bbe63 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2,8 +2,8 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" #include "PPCRecompilerImlRanges.h" +#include "IML/IML.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); @@ -4277,7 +4277,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) continue; // exclude non-infinite tight loops - if (PPCRecompilerImlAnalyzer_isTightFiniteLoop(imlSegment)) + if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) continue; // potential loop segment found, split this segment into four: // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) @@ -4376,11 +4376,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if(segIt->imlList.size() == 0 ) continue; // ignore empty segments // analyze segment for register usage - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; for(sint32 i=0; iimlList.size(); i++) { - PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList.data() + i, ®istersUsed); - //PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1); + segIt->imlList[i].CheckRegisterUsage(®istersUsed); sint32 accessedTempReg[5]; // intermediate FPRs accessedTempReg[0] = registersUsed.readFPR1; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 9edbc6ff3..4a7115c01 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -1,849 +1,10 @@ #include "../Interpreter/PPCInterpreterInternal.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IML.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, const IMLInstruction* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) -{ - registersUsed->readNamedReg1 = -1; - registersUsed->readNamedReg2 = -1; - registersUsed->readNamedReg3 = -1; - registersUsed->writtenNamedReg1 = -1; - registersUsed->readFPR1 = -1; - registersUsed->readFPR2 = -1; - registersUsed->readFPR3 = -1; - registersUsed->readFPR4 = -1; - registersUsed->writtenFPR1 = -1; - if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) - { - registersUsed->readNamedReg1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) - { - if (imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED || imlInstruction->operation == PPCREC_IML_OP_DCBZ) - { - // both operands are read only - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r.registerA; - } - else if ( - imlInstruction->operation == PPCREC_IML_OP_OR || - imlInstruction->operation == PPCREC_IML_OP_AND || - imlInstruction->operation == PPCREC_IML_OP_XOR || - imlInstruction->operation == PPCREC_IML_OP_ADD || - imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY || - imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME || - imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) - { - // result is read and written, operand is read - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r.registerA; - } - else if ( - imlInstruction->operation == PPCREC_IML_OP_ASSIGN || - imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP || - imlInstruction->operation == PPCREC_IML_OP_CNTLZW || - imlInstruction->operation == PPCREC_IML_OP_NOT || - imlInstruction->operation == PPCREC_IML_OP_NEG || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) - { - // result is written, operand is read - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerA; - } - else - cemu_assert_unimplemented(); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - if (imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED || imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - // operand register is read only - registersUsed->readNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - else if (imlInstruction->operation == PPCREC_IML_OP_ADD || - imlInstruction->operation == PPCREC_IML_OP_SUB || - imlInstruction->operation == PPCREC_IML_OP_AND || - imlInstruction->operation == PPCREC_IML_OP_OR || - imlInstruction->operation == PPCREC_IML_OP_XOR || - imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE) - { - // operand register is read and write - registersUsed->readNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - registersUsed->writtenNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - else - { - // operand register is write only - // todo - use explicit lists, avoid default cases - registersUsed->writtenNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // result is written, but also considered read (in case the condition fails) - registersUsed->readNamedReg1 = imlInstruction->op_conditional_r_s32.registerIndex; - registersUsed->writtenNamedReg1 = imlInstruction->op_conditional_r_s32.registerIndex; - } - else - cemu_assert_unimplemented(); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) - { - if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) - { - // result and operand register are both read, result is written - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r_s32.registerA; - } - else - { - // result is write only and operand is read only - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_s32.registerA; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) - { - // in all cases result is written and other operands are read only - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_r.registerA; - registersUsed->readNamedReg2 = imlInstruction->op_r_r_r.registerB; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - if( imlInstruction->operation == PPCREC_IML_MACRO_BL || imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL || imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK || imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES || imlInstruction->operation == PPCREC_IML_MACRO_HLE || imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) - { - // no effect on registers - } - else - cemu_assert_unimplemented(); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_storeLoad.registerData; - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) - { - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerMem2; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - // only affects cr register - } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - // fpr operation - registersUsed->writtenFPR1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) - { - // fpr load operation - registersUsed->writtenFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr register - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - // determine partially written result - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - registersUsed->readFPR4 = imlInstruction->op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) - { - // fpr load operation - registersUsed->writtenFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr registers - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - // determine partially written result - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - registersUsed->readFPR4 = imlInstruction->op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) - { - // fpr store operation - registersUsed->readFPR1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - // PSQ generic stores also access GQR - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerGQR; - break; - default: - break; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) - { - // fpr store operation - registersUsed->readFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr registers - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - // PSQ generic stores also access GQR - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerGQR; - break; - default: - break; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) - { - // fpr operation - if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR ) - { - // operand read, result written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - } - else if( - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT - ) - { - // operand read, result read and (partially) written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) - { - // operand read, result read and written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - - } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) - { - // operand read, result read - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r.registerResult; - } - else - cemu_assert_unimplemented(); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r_r.registerOperandA; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r_r.registerOperandB; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r_r.registerResult; - // handle partially written result - switch (imlInstruction->operation) - { - case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: - case PPCREC_IML_OP_FPR_ADD_BOTTOM: - case PPCREC_IML_OP_FPR_SUB_BOTTOM: - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r_r.registerResult; - break; - case PPCREC_IML_OP_FPR_SUB_PAIR: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r_r_r.registerOperandA; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r_r_r.registerOperandB; - registersUsed->readFPR3 = imlInstruction->op_fpr_r_r_r_r.registerOperandC; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r_r_r.registerResult; - // handle partially written result - switch (imlInstruction->operation) - { - case PPCREC_IML_OP_FPR_SELECT_BOTTOM: - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r_r_r.registerResult; - break; - case PPCREC_IML_OP_FPR_SUM0: - case PPCREC_IML_OP_FPR_SUM1: - case PPCREC_IML_OP_FPR_SELECT_PAIR: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) - { - // fpr operation - if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) - { - registersUsed->readFPR1 = imlInstruction->op_fpr_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r.registerResult; - } - else - cemu_assert_unimplemented(); - } - else - { - cemu_assert_unimplemented(); - } -} - -#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) - -sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) -{ - for (sint32 i = 0; i < 4; i++) - { - if(match[i] < 0) - continue; - if (reg == match[i]) - { - return replaced[i]; - } - } - return reg; -} - -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - { - imlInstruction->op_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r.registerA = replaceRegisterMultiple(imlInstruction->op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - imlInstruction->op_r_immS32.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - imlInstruction->op_conditional_r_s32.registerIndex = replaceRegisterMultiple(imlInstruction->op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - { - // in all cases result is written and other operand is read only - imlInstruction->op_r_r_s32.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_s32.registerA = replaceRegisterMultiple(imlInstruction->op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - { - // in all cases result is written and other operands are read only - imlInstruction->op_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_r.registerA = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_r.registerB = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) - { - if (imlInstruction->operation == PPCREC_IML_MACRO_BL || imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL || imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK || imlInstruction->operation == PPCREC_IML_MACRO_HLE || imlInstruction->operation == PPCREC_IML_MACRO_MFTB || imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) - { - // no effect on registers - } - else - { - cemu_assert_unimplemented(); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - // only affects cr register - } - else if (imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) - { - - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R) - { - - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - { - } - else - { - cemu_assert_unimplemented(); - } -} - -void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - // only affects cr register - } - else if (imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - { - imlInstruction->op_fpr_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r.registerOperand = replaceRegisterMultiple(imlInstruction->op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - { - imlInstruction->op_fpr_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandA = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandB = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - imlInstruction->op_fpr_r_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandA = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandB = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandC = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - { - imlInstruction->op_fpr_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} - -void PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 fprRegisterSearched, sint32 fprRegisterReplaced) -{ - if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - // only affects cr register - } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - imlInstruction->op_r_name.registerIndex = replaceRegister(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - imlInstruction->op_r_name.registerIndex = replaceRegister(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) - { - imlInstruction->op_fpr_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r.registerOperand = replaceRegister(imlInstruction->op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - imlInstruction->op_fpr_r_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandA = replaceRegister(imlInstruction->op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandB = replaceRegister(imlInstruction->op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - imlInstruction->op_fpr_r_r_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandA = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandB = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandC = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) - { - imlInstruction->op_fpr_r.registerResult = replaceRegister(imlInstruction->op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} - typedef struct { struct @@ -858,7 +19,7 @@ typedef struct sint32 count; }replacedRegisterTracker_t; -bool PPCRecompiler_checkIfGPRRegisterIsAccessed(PPCImlOptimizerUsedRegisters_t* registersUsed, sint32 gprRegister) +bool PPCRecompiler_checkIfGPRRegisterIsAccessed(IMLUsedRegisters* registersUsed, sint32 gprRegister) { if( registersUsed->readNamedReg1 == gprRegister ) return true; @@ -875,7 +36,7 @@ bool PPCRecompiler_checkIfGPRRegisterIsAccessed(PPCImlOptimizerUsedRegisters_t* * Returns index of register to replace * If no register needs to be replaced, -1 is returned */ -sint32 PPCRecompiler_getNextRegisterToReplace(PPCImlOptimizerUsedRegisters_t* registersUsed) +sint32 PPCRecompiler_getNextRegisterToReplace(IMLUsedRegisters* registersUsed) { // get index of register to replace sint32 gprToReplace = -1; @@ -893,10 +54,10 @@ sint32 PPCRecompiler_getNextRegisterToReplace(PPCImlOptimizerUsedRegisters_t* re bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) { - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &imlSegment->imlList[imlIndexStart], ®istersUsed); + IMLUsedRegisters registersUsed; + imlSegment->imlList[imlIndexStart].CheckRegisterUsage(®istersUsed); // mask all registers used by this instruction - uint32 instructionReservedRegisterMask = 0;//(1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; + uint32 instructionReservedRegisterMask = 0; if( registersUsed.readNamedReg1 != -1 ) instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg1)); if( registersUsed.readNamedReg2 != -1 ) @@ -1006,10 +167,10 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte size_t imlIndex = 0; while( imlIndex < segIt->imlList.size() ) { - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; while( true ) { - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList.data()+imlIndex, ®istersUsed); + segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) { // get index of register to replace @@ -1055,7 +216,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; } // replace registers that are out of range - PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList.data() + imlIndex, fprToReplace, unusedRegisterIndex); + segIt->imlList[imlIndex].ReplaceFPRRegisterUsage(fprToReplace, unusedRegisterIndex); // add load/store name after instruction PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); // add load/store before current instruction @@ -1121,7 +282,7 @@ typedef struct sint32 currentUseIndex; }ppcRecManageRegisters_t; -ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, PPCImlOptimizerUsedRegisters_t* instructionUsedRegisters) +ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) { // find free register for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) @@ -1138,7 +299,7 @@ ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageReg return nullptr; } -ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, PPCImlOptimizerUsedRegisters_t* instructionUsedRegisters, uint32 unloadLockedMask) +ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) { // find unloadable register (with lowest lastUseIndex) sint32 unloadIndex = -1; @@ -1179,13 +340,13 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; size_t idx = 0; sint32 currentUseIndex = 0; - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; while (idx < imlSegment->imlList.size()) { IMLInstruction& idxInst = imlSegment->imlList[idx]; if (idxInst.IsSuffixInstruction()) break; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, &idxInst, ®istersUsed); + idxInst.CheckRegisterUsage(®istersUsed); sint32 fprMatch[4]; sint32 fprReplace[4]; fprMatch[0] = -1; @@ -1288,7 +449,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon } if (numReplacedOperands > 0) { - PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList.data() + idx, fprMatch, fprReplace); + imlSegment->imlList[idx].ReplaceFPRRegisterUsageMultiple(fprMatch, fprReplace); } // next idx++; @@ -1340,9 +501,8 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG for(size_t i=startIndex; iimlList.size(); i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - //nameStoreInstruction->op_r_name.registerIndex - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + IMLUsedRegisters registersUsed; + imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) return false; if( registersUsed.writtenNamedReg1 == registerIndex ) @@ -1361,8 +521,8 @@ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcI for(size_t i=startIndex; iimlList.size(); i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + IMLUsedRegisters registersUsed; + imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex) return false; if( registersUsed.writtenFPR1 == registerIndex ) @@ -1381,8 +541,8 @@ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcIml for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + IMLUsedRegisters registersUsed; + imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.writtenNamedReg1 == registerIndex ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) @@ -1440,8 +600,8 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + IMLUsedRegisters registersUsed; + imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.writtenFPR1 == registerIndex ) { if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) @@ -1565,7 +725,7 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext else cemu_assert_unimplemented(); } - else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7 ) + else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) { segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); } @@ -1581,7 +741,7 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext { for (IMLInstruction& instIt : segIt->imlList) { - if( PPCRecompilerImlAnalyzer_canTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7 ) + if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) { uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); @@ -1594,11 +754,11 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) { - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; for (sint32 i = startIndex; i <= endIndex; i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + imlInstruction->CheckRegisterUsage(®istersUsed); if (registersUsed.writtenNamedReg1 == vreg) return true; } @@ -1642,13 +802,13 @@ sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcIml currentIndex = startIndex; currentSegment = startSegment; segmentIterateCount = 0; - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; while (true) { while (currentIndex >= 0) { // check if register is modified - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, currentSegment->imlList.data() + currentIndex, ®istersUsed); + currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); if (registersUsed.writtenNamedReg1 == foundRegister) return -1; // check if end of scan reached @@ -1677,7 +837,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) return; - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) bool foundMatch = false; sint32 lastStore = -1; @@ -1709,7 +869,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI } // check if FPR is overwritten (we can actually ignore read operations?) - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + imlInstruction->CheckRegisterUsage(®istersUsed); if (registersUsed.writtenFPR1 == fprIndex) break; if (registersUsed.readFPR1 == fprIndex) @@ -1766,7 +926,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) return; bool foundMatch = false; - PPCImlOptimizerUsedRegisters_t registersUsed; + IMLUsedRegisters registersUsed; sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) sint32 i = imlIndexLoad + 1; for (; i < scanRangeEnd; i++) @@ -1795,7 +955,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp } } // check if GPR is accessed - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); + imlInstruction->CheckRegisterUsage(®istersUsed); if (registersUsed.readNamedReg1 == gprIndex || registersUsed.readNamedReg2 == gprIndex || registersUsed.readNamedReg3 == gprIndex) @@ -1930,7 +1090,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) // get GQR value cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0); + cemu_assert(gqrIndex >= 0 && gqrIndex < 8); if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) continue; uint32 gqrValue; @@ -1974,7 +1134,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) /* * Returns true if registerWrite overwrites any of the registers read by registerRead */ -bool PPCRecompilerAnalyzer_checkForGPROverwrite(PPCImlOptimizerUsedRegisters_t* registerRead, PPCImlOptimizerUsedRegisters_t* registerWrite) +bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) { if (registerWrite->writtenNamedReg1 < 0) return false; @@ -1998,7 +1158,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) return; // get CR bitmask of bit required for conditional jump PPCRecCRTracking_t crTracking; - PPCRecompilerImlAnalyzer_getCRTracking(lastInstruction, &crTracking); + IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); uint32 requiredCRBits = crTracking.readCRBits; // scan backwards until we find the instruction that sets the CR @@ -2007,7 +1167,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - PPCRecompilerImlAnalyzer_getCRTracking(imlInstruction, &crTracking); + IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); if (crTracking.readCRBits != 0) return; // dont handle complex cases for now if (crTracking.writtenCRBits != 0) @@ -2039,17 +1199,17 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) return; // no danger of overwriting eflags, don't reorder // check if we can move the CR setter instruction to after unsafeInstructionIndex PPCRecCRTracking_t crTrackingSetter = crTracking; - PPCImlOptimizerUsedRegisters_t regTrackingCRSetter; - PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex, ®TrackingCRSetter); + IMLUsedRegisters regTrackingCRSetter; + imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) return; // we don't handle FPR dependency yet so just ignore FPR instructions - PPCImlOptimizerUsedRegisters_t registerTracking; + IMLUsedRegisters registerTracking; if (regTrackingCRSetter.writtenNamedReg1 >= 0) { // CR setter does write GPR for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) { - PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + i, ®isterTracking); + imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); // reads register written by CR setter? if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) { @@ -2070,7 +1230,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) // CR setter does not write GPR for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) { - PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + i, ®isterTracking); + imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); // writes register read by CR setter? if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index afe6d943c..f162e0238 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -1,10 +1,10 @@ +#include "./IML/IML.h" + #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" #include "PPCRecompilerImlRanges.h" -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); - uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() @@ -831,8 +831,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // replace registers if (index < imlSegment->imlList.size()) { - PPCImlOptimizerUsedRegisters_t gprTracking; - PPCRecompiler_checkRegisterUsage(nullptr, imlSegment->imlList.data() + index, &gprTracking); + IMLUsedRegisters gprTracking; + imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); sint32 inputGpr[4]; inputGpr[0] = gprTracking.gpr[0]; @@ -853,7 +853,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; cemu_assert_debug(replaceGpr[f] >= 0); } - PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList.data() + index, inputGpr, replaceGpr); + imlSegment->imlList[index].ReplaceGPRRegisterUsageMultiple(inputGpr, replaceGpr); } // next iml instruction index++; @@ -1014,14 +1014,14 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, } // scan instructions for usage range size_t index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; + IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { // end loop at suffix instruction if (imlSegment->imlList[index].IsSuffixInstruction()) break; // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); + imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); for (sint32 t = 0; t < 4; t++) { sint32 virtualRegister = gprTracking.gpr[t]; @@ -1107,14 +1107,14 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, } // parse instructions and convert to locations size_t index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; + IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { // end loop at suffix instruction if (imlSegment->imlList[index].IsSuffixInstruction()) break; // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList.data() + index, &gprTracking); + imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); // handle accessed GPR for (sint32 t = 0; t < 4; t++) { From e53c6ad2e9fd2ddbd9a66ac824b8cce7f84513bc Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 5 Nov 2022 22:06:24 +0100 Subject: [PATCH 08/64] PPCRec: Move IML optimizer file --- src/Cafe/CMakeLists.txt | 2 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 10 ++++ .../Recompiler/IML/IMLInstruction.cpp | 6 +-- .../Espresso/Recompiler/IML/IMLInstruction.h | 6 +-- .../IMLOptimizer.cpp} | 50 ++++--------------- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 13 ----- .../Recompiler/PPCRecompilerImlGen.cpp | 1 - .../PPCRecompilerImlRegisterAllocator.cpp | 2 +- 8 files changed, 27 insertions(+), 63 deletions(-) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerImlOptimizer.cpp => IML/IMLOptimizer.cpp} (96%) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index bbe28a9a1..380e45971 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -74,10 +74,10 @@ add_library(CemuCafe HW/Espresso/Recompiler/IML/IMLInstruction.h HW/Espresso/Recompiler/IML/IMLDebug.cpp HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp + HW/Espresso/Recompiler/IML/IMLOptimizer.cpp HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp HW/Espresso/Recompiler/PPCRecompilerIml.h - HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.h HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 6619e75a6..b991b3595 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -14,6 +14,16 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking); +// optimizer passes +// todo - rename +bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); +bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); + // debug void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 1a0d8c559..46ed886d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -434,7 +434,7 @@ sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) return reg; } -void IMLInstruction::ReplaceGPRRegisterUsageMultiple(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) { if (type == PPCREC_IML_TYPE_R_NAME) { @@ -609,7 +609,7 @@ void IMLInstruction::ReplaceGPRRegisterUsageMultiple(sint32 gprRegisterSearched[ } } -void IMLInstruction::ReplaceFPRRegisterUsageMultiple(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) +void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) { if (type == PPCREC_IML_TYPE_R_NAME) { @@ -727,7 +727,7 @@ void IMLInstruction::ReplaceFPRRegisterUsageMultiple(sint32 fprRegisterSearched[ } } -void IMLInstruction::ReplaceFPRRegisterUsage(sint32 fprRegisterSearched, sint32 fprRegisterReplaced) +void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced) { if (type == PPCREC_IML_TYPE_R_NAME) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index b72e7a67a..18cf580d3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -440,7 +440,7 @@ struct IMLInstruction void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; - void ReplaceGPRRegisterUsageMultiple(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); - void ReplaceFPRRegisterUsageMultiple(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); - void ReplaceFPRRegisterUsage(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); + void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); + void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp similarity index 96% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp rename to src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 4a7115c01..fc39d3f75 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -1,11 +1,12 @@ -#include "../Interpreter/PPCInterpreterInternal.h" +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Recompiler/IML/IML.h" #include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -typedef struct +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "../PPCRecompilerX64.h" + +struct replacedRegisterTracker_t { struct { @@ -17,40 +18,7 @@ typedef struct bool nameMustBeMaintained; // must be stored before replacement and loaded after replacement ends }replacedRegisterEntry[PPC_X64_GPR_USABLE_REGISTERS]; sint32 count; -}replacedRegisterTracker_t; - -bool PPCRecompiler_checkIfGPRRegisterIsAccessed(IMLUsedRegisters* registersUsed, sint32 gprRegister) -{ - if( registersUsed->readNamedReg1 == gprRegister ) - return true; - if( registersUsed->readNamedReg2 == gprRegister ) - return true; - if( registersUsed->readNamedReg3 == gprRegister ) - return true; - if( registersUsed->writtenNamedReg1 == gprRegister ) - return true; - return false; -} - -/* - * Returns index of register to replace - * If no register needs to be replaced, -1 is returned - */ -sint32 PPCRecompiler_getNextRegisterToReplace(IMLUsedRegisters* registersUsed) -{ - // get index of register to replace - sint32 gprToReplace = -1; - if( registersUsed->readNamedReg1 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg1; - else if( registersUsed->readNamedReg2 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg2; - else if( registersUsed->readNamedReg3 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg3; - else if( registersUsed->writtenNamedReg1 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->writtenNamedReg1; - // return - return gprToReplace; -} +}; bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) { @@ -216,7 +184,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; } // replace registers that are out of range - segIt->imlList[imlIndex].ReplaceFPRRegisterUsage(fprToReplace, unusedRegisterIndex); + segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); // add load/store name after instruction PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); // add load/store before current instruction @@ -449,7 +417,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon } if (numReplacedOperands > 0) { - imlSegment->imlList[idx].ReplaceFPRRegisterUsageMultiple(fprMatch, fprReplace); + imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); } // next idx++; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 1af1ee087..d9706c13d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -122,19 +122,6 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment); -// IML optimizer -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); - -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext); - // IML register allocator void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext); -// late optimizations -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 4e91bbe63..c787fadc1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -3881,7 +3881,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses) { - //ppcImlGenContext_t ppcImlGenContext = { 0 }; ppcImlGenContext.functionRef = ppcRecFunc; // add entire range ppcRecRange_t recRange; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index f162e0238..183e943e8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -853,7 +853,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; cemu_assert_debug(replaceGpr[f] >= 0); } - imlSegment->imlList[index].ReplaceGPRRegisterUsageMultiple(inputGpr, replaceGpr); + imlSegment->imlList[index].ReplaceGPR(inputGpr, replaceGpr); } // next iml instruction index++; From d1fe1a905f174a32d1e042b9965b28c8a6b1e980 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 6 Nov 2022 12:39:30 +0100 Subject: [PATCH 09/64] PPCRec: Move IML register allocator --- bin/keys.txt | 4 + src/Cafe/CMakeLists.txt | 6 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 3 + .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 2 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 4 +- .../IMLRegisterAllocator.cpp} | 18 ++-- .../IMLRegisterAllocatorRanges.cpp} | 8 +- .../IMLRegisterAllocatorRanges.h} | 0 .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 88 +++++++++++++++++ .../HW/Espresso/Recompiler/IML/IMLSegment.h | 8 ++ .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 9 -- .../Recompiler/PPCRecompilerImlGen.cpp | 32 +++--- .../Recompiler/PPCRecompilerIntermediate.cpp | 99 ++----------------- 13 files changed, 145 insertions(+), 136 deletions(-) create mode 100644 bin/keys.txt rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerImlRegisterAllocator.cpp => IML/IMLRegisterAllocator.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerImlRanges.cpp => IML/IMLRegisterAllocatorRanges.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerImlRanges.h => IML/IMLRegisterAllocatorRanges.h} (100%) diff --git a/bin/keys.txt b/bin/keys.txt new file mode 100644 index 000000000..8782dbfe7 --- /dev/null +++ b/bin/keys.txt @@ -0,0 +1,4 @@ +# this file contains keys needed for decryption of disc file system data (WUD/WUX) +# 1 key per line, any text after a '#' character is considered a comment +# the emulator will automatically pick the right key +541b9889519b27d363cd21604b97c67a # example key (can be deleted) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 380e45971..db6bce024 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -75,12 +75,12 @@ add_library(CemuCafe HW/Espresso/Recompiler/IML/IMLDebug.cpp HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp HW/Espresso/Recompiler/IML/IMLOptimizer.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp HW/Espresso/Recompiler/PPCRecompilerIml.h - HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp - HW/Espresso/Recompiler/PPCRecompilerImlRanges.h - HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index b991b3595..4bee5c5dd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -24,6 +24,9 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); +// register allocator +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext); + // debug void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index b8094bb8e..c64fc5130 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -1,9 +1,9 @@ #include "IML.h" #include "IMLInstruction.h" #include "IMLSegment.h" +#include "IMLRegisterAllocatorRanges.h" #include "util/helpers/StringBuf.h" -#include "Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h" const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index fc39d3f75..88463447f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -1118,8 +1118,8 @@ bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, void _reorderConditionModifyInstructions(IMLSegment* imlSegment) { - IMLInstruction* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); - // last instruction a conditional branch? + IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); + // last instruction is a conditional branch? if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) return; if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp rename to src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 183e943e8..cf309c4ea 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1,9 +1,9 @@ -#include "./IML/IML.h" +#include "IML.h" -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "../PPCRecompilerX64.h" +#include "IMLRegisterAllocatorRanges.h" uint32 recRACurrentIterationIndex = 0; @@ -964,9 +964,9 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken; - PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + IMLSegment_RemoveLink(imlSegmentP0, nextSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); segmentIndex++; } // detect loops @@ -982,7 +982,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen } } -void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) { PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp rename to src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 176f2034d..64a0966e0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -1,7 +1,7 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "../PPCRecompilerX64.h" +#include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h similarity index 100% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h rename to src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index e7eb3b323..4882a0a15 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -8,3 +8,91 @@ bool IMLSegment::HasSuffixInstruction() const const IMLInstruction& imlInstruction = imlList.back(); return imlInstruction.IsSuffixInstruction(); } + +IMLInstruction* IMLSegment::GetLastInstruction() +{ + if (imlList.empty()) + return nullptr; + return &imlList.back(); +} + + + + +void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + // make sure segments aren't already linked + if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) + return; + // add as next segment for source + if (imlSegmentSrc->nextSegmentBranchNotTaken != nullptr) + assert_dbg(); + imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst; + // add as previous segment for destination + imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); +} + +void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + // make sure segments aren't already linked + if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) + return; + // add as next segment for source + if (imlSegmentSrc->nextSegmentBranchTaken != nullptr) + assert_dbg(); + imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst; + // add as previous segment for destination + imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); +} + +void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) + { + imlSegmentSrc->nextSegmentBranchNotTaken = nullptr; + } + else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) + { + imlSegmentSrc->nextSegmentBranchTaken = nullptr; + } + else + assert_dbg(); + + bool matchFound = false; + for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++) + { + if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc) + { + imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin() + i); + matchFound = true; + break; + } + } + if (matchFound == false) + assert_dbg(); +} + +/* + * Replaces all links to segment orig with linkts to segment new + */ +void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew) +{ + while (imlSegmentOrig->list_prevSegments.size() != 0) + { + IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0]; + if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig) + { + IMLSegment_RemoveLink(prevSegment, imlSegmentOrig); + IMLSegment_SetLinkBranchNotTaken(prevSegment, imlSegmentNew); + } + else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig) + { + IMLSegment_RemoveLink(prevSegment, imlSegmentOrig); + IMLSegment_SetLinkBranchTaken(prevSegment, imlSegmentNew); + } + else + { + assert_dbg(); + } + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 216e17488..f9fccb0ef 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -42,4 +42,12 @@ struct IMLSegment ppcRecompilerSegmentPoint_t* segmentPointList{}; bool HasSuffixInstruction() const; + IMLInstruction* GetLastInstruction(); + }; + + +void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew); +void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index d9706c13d..1db1963f3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -114,14 +114,5 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerIml_setLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); -void PPCRecompilerIml_setLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); -void PPCRecompilerIML_relinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew); -void PPCRecompilerIML_removeLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); -IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment); - -// IML register allocator -void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext); - diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index c787fadc1..b5897032f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2,8 +2,8 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -#include "PPCRecompilerImlRanges.h" #include "IML/IML.h" +#include "IML/IMLRegisterAllocatorRanges.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); @@ -4149,7 +4149,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext { if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) continue; // not a branching segment - IMLInstruction* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt); + IMLInstruction* lastInstruction = segIt->GetLastInstruction(); if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) continue; IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; @@ -4195,10 +4195,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // update segment links // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - PPCRecompilerIML_removeLink(segIt, conditionalSegment); - PPCRecompilerIML_removeLink(segIt, finalSegment); - PPCRecompilerIML_removeLink(conditionalSegment, finalSegment); - PPCRecompilerIml_setLinkBranchNotTaken(segIt, finalSegment); + IMLSegment_RemoveLink(segIt, conditionalSegment); + IMLSegment_RemoveLink(segIt, finalSegment); + IMLSegment_RemoveLink(conditionalSegment, finalSegment); + IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); // remove all instructions from conditional segment conditionalSegment->imlList.clear(); @@ -4206,18 +4206,18 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) { // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - PPCRecompilerIML_removeLink(segIt, finalSegment); + IMLSegment_RemoveLink(segIt, finalSegment); if (finalSegment->nextSegmentBranchNotTaken) { IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; - PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchNotTaken(segIt, tempSegment); + IMLSegment_RemoveLink(finalSegment, tempSegment); + IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); } if (finalSegment->nextSegmentBranchTaken) { IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; - PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment); + IMLSegment_RemoveLink(finalSegment, tempSegment); + IMLSegment_SetLinkBranchTaken(segIt, tempSegment); } // copy IML instructions cemu_assert_debug(segIt != finalSegment); @@ -4296,10 +4296,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; // relink segments - PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - PPCRecompilerIml_setLinkBranchTaken(imlSegmentP0, imlSegmentP2); - PPCRecompilerIml_setLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); // update segments uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; if (imlSegmentP2->isEnterable) @@ -4413,7 +4413,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } } - PPCRecompilerImm_allocateRegisters(&ppcImlGenContext); + IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); // remove redundant name load and store instructions PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 3d6013adb..ccb0fc83f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -11,85 +11,7 @@ IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlG } } debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); - return NULL; -} - -void PPCRecompilerIml_setLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) -{ - // make sure segments aren't already linked - if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) - return; - // add as next segment for source - if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL) - assert_dbg(); - imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst; - // add as previous segment for destination - imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); -} - -void PPCRecompilerIml_setLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) -{ - // make sure segments aren't already linked - if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) - return; - // add as next segment for source - if (imlSegmentSrc->nextSegmentBranchTaken != NULL) - assert_dbg(); - imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst; - // add as previous segment for destination - imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); -} - -void PPCRecompilerIML_removeLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) -{ - if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) - { - imlSegmentSrc->nextSegmentBranchNotTaken = NULL; - } - else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) - { - imlSegmentSrc->nextSegmentBranchTaken = NULL; - } - else - assert_dbg(); - - bool matchFound = false; - for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++) - { - if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc) - { - imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i); - matchFound = true; - break; - } - } - if (matchFound == false) - assert_dbg(); -} - -/* - * Replaces all links to segment orig with linkts to segment new - */ -void PPCRecompilerIML_relinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew) -{ - while (imlSegmentOrig->list_prevSegments.size() != 0) - { - IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0]; - if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig) - { - PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig); - PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew); - } - else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig) - { - PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig); - PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew); - } - else - { - assert_dbg(); - } - } + return nullptr; } void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) @@ -105,7 +27,7 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) if( imlSegment->imlList.empty()) { if (isLastSegment == false) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment + IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment else imlSegment->nextSegmentIsUncertain = true; continue; @@ -119,8 +41,8 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) if( jumpDestSegment ) { if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment); - PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); } else { @@ -135,7 +57,7 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) else { // all other instruction types do not branch - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); } } } @@ -156,17 +78,10 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0); - PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment); + IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false; imlSegment->enterPPCAddress = 0; } } -} - -IMLInstruction* PPCRecompilerIML_getLastInstruction(IMLSegment* imlSegment) -{ - if (imlSegment->imlList.empty()) - return nullptr; - return imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); -} +} \ No newline at end of file From 27f70d5f34913a970cfa3c483651a8f7e88a541f Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 7 Nov 2022 03:23:46 +0100 Subject: [PATCH 10/64] PPCRec: Emit x86 movd for non-AVX + more restructuring --- src/Cafe/CMakeLists.txt | 1 + src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 16 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 1 + .../Recompiler/IML/IMLRegisterAllocator.cpp | 22 +- .../Recompiler/IML/IMLRegisterAllocator.h | 0 .../HW/Espresso/Recompiler/IML/IMLSegment.h | 83 ++- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 128 ++-- .../HW/Espresso/Recompiler/PPCRecompiler.h | 95 +-- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 5 +- .../Recompiler/PPCRecompilerImlGen.cpp | 109 +--- .../Espresso/Recompiler/PPCRecompilerX64.cpp | 595 ++++++++++-------- .../HW/Espresso/Recompiler/PPCRecompilerX64.h | 6 + .../Recompiler/PPCRecompilerX64FPU.cpp | 74 +-- 13 files changed, 570 insertions(+), 565 deletions(-) create mode 100644 src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index db6bce024..8c2dd5f4f 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -76,6 +76,7 @@ add_library(CemuCafe HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp HW/Espresso/Recompiler/IML/IMLOptimizer.cpp HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 4bee5c5dd..06f398156 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -16,16 +16,16 @@ void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_ // optimizer passes // todo - rename -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); +bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_removeRedundantCRUpdates(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); // register allocator -void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext); +void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); // debug void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index c64fc5130..69d8e1b7d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -4,6 +4,7 @@ #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/StringBuf.h" +#include "../PPCRecompiler.h" const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index cf309c4ea..a75f634fd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -750,8 +750,8 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - sint16 virtualReg2PhysReg[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; raLiveRangeInfo_t liveInfo; @@ -848,7 +848,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, replaceGpr[f] = -1; continue; } - if (virtualRegister >= PPC_REC_MAX_VIRTUAL_GPR) + if (virtualRegister >= IML_RA_VIRT_REG_COUNT_MAX) assert_dbg(); replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; cemu_assert_debug(replaceGpr[f] >= 0); @@ -860,7 +860,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } // expire infinite subranges (subranges that cross the segment border) sint32 storeLoadListLength = 0; - raLoadStoreInfo_t loadStoreList[PPC_REC_MAX_VIRTUAL_GPR]; + raLoadStoreInfo_t loadStoreList[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; @@ -1007,7 +1007,7 @@ bool _isRangeDefined(IMLSegment* imlSegment, sint32 vGPR) void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) { imlSegment->raDistances.reg[i].usageStart = INT_MAX; imlSegment->raDistances.reg[i].usageEnd = INT_MIN; @@ -1027,7 +1027,7 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 virtualRegister = gprTracking.gpr[t]; if (virtualRegister < 0) continue; - cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); + cemu_assert_debug(virtualRegister < IML_RA_VIRT_REG_COUNT_MAX); imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction } @@ -1086,7 +1086,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) { if (_isRangeDefined(imlSegment, i) == false) continue; @@ -1096,8 +1096,8 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); } // create lookup table of ranges - raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + raLivenessSubrange_t* vGPR2Subrange[IML_RA_VIRT_REG_COUNT_MAX]; + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) { vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; #ifdef CEMU_DEBUG_ASSERT @@ -1257,7 +1257,7 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries { if (imlSegment->raDistances.reg[i].usageStart == INT_MAX) continue; // not used @@ -1334,7 +1334,7 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) continue; // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries { if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) continue; // range not set or does not reach end of segment diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h new file mode 100644 index 000000000..e69de29bb diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index f9fccb0ef..1e27d303a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,7 +1,84 @@ #pragma once #include "IMLInstruction.h" -#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" // remove once dependency is gone +#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic + +struct IMLSegmentPoint +{ + sint32 index; + struct IMLSegment* imlSegment; + IMLSegmentPoint* next; + IMLSegmentPoint* prev; +}; + +struct raLivenessLocation_t +{ + sint32 index; + bool isRead; + bool isWrite; + + raLivenessLocation_t() = default; + + raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) + : index(index), isRead(isRead), isWrite(isWrite) {}; +}; + +struct raLivenessSubrangeLink_t +{ + struct raLivenessSubrange_t* prev; + struct raLivenessSubrange_t* next; +}; + +struct raLivenessSubrange_t +{ + struct raLivenessRange_t* range; + IMLSegment* imlSegment; + IMLSegmentPoint start; + IMLSegmentPoint end; + // dirty state tracking + bool _noLoad; + bool hasStore; + bool hasStoreDelayed; + // next + raLivenessSubrange_t* subrangeBranchTaken; + raLivenessSubrange_t* subrangeBranchNotTaken; + // processing + uint32 lastIterationIndex; + // instruction locations + std::vector list_locations; + // linked list (subranges with same GPR virtual register) + raLivenessSubrangeLink_t link_sameVirtualRegisterGPR; + // linked list (all subranges for this segment) + raLivenessSubrangeLink_t link_segmentSubrangesGPR; +}; + +struct raLivenessRange_t +{ + sint32 virtualRegister; + sint32 physicalRegister; + sint32 name; + std::vector list_subranges; +}; + +struct PPCSegmentRegisterAllocatorInfo_t +{ + // analyzer stage + bool isPartOfProcessedLoop{}; // used during loop detection + sint32 lastIterationIndex{}; + // linked lists + raLivenessSubrange_t* linkedList_allSubranges{}; + raLivenessSubrange_t* linkedList_perVirtualGPR[IML_RA_VIRT_REG_COUNT_MAX]{}; +}; + +struct PPCRecVGPRDistances_t +{ + struct _RegArrayEntry + { + sint32 usageStart{}; + sint32 usageEnd{}; + }reg[IML_RA_VIRT_REG_COUNT_MAX]; + bool isProcessed[IML_RA_VIRT_REG_COUNT_MAX]{}; +}; struct IMLSegment { @@ -39,11 +116,9 @@ struct IMLSegment PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; // segment points - ppcRecompilerSegmentPoint_t* segmentPointList{}; - + IMLSegmentPoint* segmentPointList{}; bool HasSuffixInstruction() const; IMLInstruction* GetLastInstruction(); - }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 09f10956a..6c3cbde3b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -14,6 +14,8 @@ #include "util/helpers/helpers.h" #include "util/MemMapper/MemMapper.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IML.h" + struct PPCInvalidationRange { MPTR startAddress; @@ -127,6 +129,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress) PPCRecompiler_enter(hCPU, funcPtr); } } +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext); PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut) { @@ -153,21 +156,27 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses); if (compiledSuccessfully == false) { - // todo: Free everything - PPCRecompiler_freeContext(&ppcImlGenContext); delete ppcRecFunc; - return NULL; + return nullptr; } + + // apply passes + if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) + { + delete ppcRecFunc; + return nullptr; + } + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) { - PPCRecompiler_freeContext(&ppcImlGenContext); return nullptr; } @@ -183,11 +192,82 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } - - PPCRecompiler_freeContext(&ppcImlGenContext); return ppcRecFunc; } +void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext); + +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +{ + PPCRecompiler_FixLoops(ppcImlGenContext); + + // isolate entry points from function flow (enterable segments must not be the target of any other segment) + // this simplifies logic during register allocation + PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); + + // if GQRs can be predicted, optimize PSQ load/stores + PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); + + // count number of used registers + uint32 numLoadedFPRRegisters = 0; + for (uint32 i = 0; i < 255; i++) + { + if (ppcImlGenContext.mappedFPRRegister[i]) + numLoadedFPRRegisters++; + } + + // insert name store instructions at the end of each segment but before branch instructions + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + if (segIt->imlList.size() == 0) + continue; // ignore empty segments + // analyze segment for register usage + IMLUsedRegisters registersUsed; + for (sint32 i = 0; i < segIt->imlList.size(); i++) + { + segIt->imlList[i].CheckRegisterUsage(®istersUsed); + sint32 accessedTempReg[5]; + // intermediate FPRs + accessedTempReg[0] = registersUsed.readFPR1; + accessedTempReg[1] = registersUsed.readFPR2; + accessedTempReg[2] = registersUsed.readFPR3; + accessedTempReg[3] = registersUsed.readFPR4; + accessedTempReg[4] = registersUsed.writtenFPR1; + for (sint32 f = 0; f < 5; f++) + { + if (accessedTempReg[f] == -1) + continue; + uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; + if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) + { + segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; + } + } + } + } + + // merge certain float load+store patterns (must happen before FPR register remapping) + PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); + // delay byte swapping for certain load+store patterns + PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); + + if (numLoadedFPRRegisters > 0) + { + if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) + { + return false; + } + } + + IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); + + // remove redundant name load and store instructions + PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); + PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); + + return true; +} + bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector>& entryPoints) { // update jump table @@ -511,42 +591,6 @@ void PPCRecompiler_init() PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize()); PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize()); - // init x64 recompiler instance data - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL; - ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31); - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL; - ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0; - ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0; - ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0; - ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0; - ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f; - *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000; - *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000; - // setup GQR scale tables for (uint32 i = 0; i < 32; i++) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 88bd1d946..e943d8d37 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -25,84 +25,6 @@ struct PPCRecFunction_t }; #include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" - -typedef struct _ppcRecompilerSegmentPoint_t -{ - sint32 index; - struct IMLSegment* imlSegment; - _ppcRecompilerSegmentPoint_t* next; - _ppcRecompilerSegmentPoint_t* prev; -}ppcRecompilerSegmentPoint_t; - -struct raLivenessLocation_t -{ - sint32 index; - bool isRead; - bool isWrite; - - raLivenessLocation_t() = default; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; -}; - -struct raLivenessSubrangeLink_t -{ - struct raLivenessSubrange_t* prev; - struct raLivenessSubrange_t* next; -}; - -struct raLivenessSubrange_t -{ - struct raLivenessRange_t* range; - IMLSegment* imlSegment; - ppcRecompilerSegmentPoint_t start; - ppcRecompilerSegmentPoint_t end; - // dirty state tracking - bool _noLoad; - bool hasStore; - bool hasStoreDelayed; - // next - raLivenessSubrange_t* subrangeBranchTaken; - raLivenessSubrange_t* subrangeBranchNotTaken; - // processing - uint32 lastIterationIndex; - // instruction locations - std::vector list_locations; - // linked list (subranges with same GPR virtual register) - raLivenessSubrangeLink_t link_sameVirtualRegisterGPR; - // linked list (all subranges for this segment) - raLivenessSubrangeLink_t link_segmentSubrangesGPR; -}; - -struct raLivenessRange_t -{ - sint32 virtualRegister; - sint32 physicalRegister; - sint32 name; - std::vector list_subranges; -}; - -struct PPCSegmentRegisterAllocatorInfo_t -{ - // analyzer stage - bool isPartOfProcessedLoop{}; // used during loop detection - sint32 lastIterationIndex{}; - // linked lists - raLivenessSubrange_t* linkedList_allSubranges{}; - raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{}; -}; - -struct PPCRecVGPRDistances_t -{ - struct _RegArrayEntry - { - sint32 usageStart{}; - sint32 usageEnd{}; - }reg[PPC_REC_MAX_VIRTUAL_GPR]; - bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{}; -}; - #include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h" struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext); @@ -140,6 +62,21 @@ struct ppcImlGenContext_t bool modifiesGQR[8]; }tracking; + ~ppcImlGenContext_t() + { + if (imlList) + { + free(imlList); + imlList = nullptr; + } + + for (IMLSegment* imlSegment : segmentList2) + { + delete imlSegment; + } + segmentList2.clear(); + } + // append raw instruction IMLInstruction& emitInst() { @@ -194,8 +131,6 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)(); #define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1) -// todo - move some of the stuff above into PPCRecompilerInternal.h - // recompiler interface void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 1db1963f3..3b8783f50 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -2,7 +2,6 @@ #define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); -void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); @@ -10,8 +9,8 @@ IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 i void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count); -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index); -void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint); +void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index); +void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); // GPR register management uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b5897032f..a1cb6f2e2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2933,7 +2933,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext return v; } -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index) +void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index) { segmentPoint->imlSegment = imlSegment; segmentPoint->index = index; @@ -2944,7 +2944,7 @@ void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, imlSegment->segmentPointList = segmentPoint; } -void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint) +void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint) { if (segmentPoint->prev) segmentPoint->prev->next = segmentPoint->next; @@ -2975,7 +2975,7 @@ void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, // update position of segment points if (imlSegment->segmentPointList) { - ppcRecompilerSegmentPoint_t* segmentPoint = imlSegment->segmentPointList; + IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList; while (segmentPoint) { if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END) @@ -3017,21 +3017,6 @@ void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint3 ppcImlGenContext->segmentList2[index + i] = new IMLSegment(); } -void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) -{ - if (ppcImlGenContext->imlList) - { - free(ppcImlGenContext->imlList); - ppcImlGenContext->imlList = nullptr; - } - - for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2) - { - delete imlSegment; - } - ppcImlGenContext->segmentList2.clear(); -} - bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) { bool unsupportedInstructionFound = false; @@ -3953,9 +3938,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) { - // could not compile function debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); - PPCRecompiler_freeContext(&ppcImlGenContext); return false; } // optimize unused jumpmarks away @@ -4260,16 +4243,20 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext segIt->imlList[0].op_macro.param = cycleCount; } } + return true; +} +void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) +{ // find segments that have a (conditional) jump instruction that points in reverse direction of code flow // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for(size_t s=0; sppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - if( imlSegment->imlList.empty() ) + if (imlSegment->imlList.empty()) continue; if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) continue; @@ -4289,12 +4276,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); imlSegment = NULL; - IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0]; - IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1]; - IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2]; + IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; // create entry point segment PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; + IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; // relink segments IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); @@ -4322,7 +4309,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext imlSegmentP2->ppcAddrMin = 0; imlSegmentP2->ppcAddrMax = 0; // setup enterable segment - if( enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF ) + if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) { imlSegmentPEntry->isEnterable = true; imlSegmentPEntry->ppcAddress = enterPPCAddress; @@ -4353,70 +4340,4 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // skip the newly created segments s += 2; } - - // isolate entry points from function flow (enterable segments must not be the target of any other segment) - // this simplifies logic during register allocation - PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // count number of used registers - uint32 numLoadedFPRRegisters = 0; - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext.mappedFPRRegister[i] ) - numLoadedFPRRegisters++; - } - - // insert name store instructions at the end of each segment but before branch instructions - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if(segIt->imlList.size() == 0 ) - continue; // ignore empty segments - // analyze segment for register usage - IMLUsedRegisters registersUsed; - for(sint32 i=0; iimlList.size(); i++) - { - segIt->imlList[i].CheckRegisterUsage(®istersUsed); - sint32 accessedTempReg[5]; - // intermediate FPRs - accessedTempReg[0] = registersUsed.readFPR1; - accessedTempReg[1] = registersUsed.readFPR2; - accessedTempReg[2] = registersUsed.readFPR3; - accessedTempReg[3] = registersUsed.readFPR4; - accessedTempReg[4] = registersUsed.writtenFPR1; - for(sint32 f=0; f<5; f++) - { - if( accessedTempReg[f] == -1 ) - continue; - uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; - if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 ) - { - segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - } - } - } - } - - // merge certain float load+store patterns (must happen before FPR register remapping) - PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); - // delay byte swapping for certain load+store patterns - PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - - if (numLoadedFPRRegisters > 0) - { - if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - { - PPCRecompiler_freeContext(&ppcImlGenContext); - return false; - } - } - - IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); - - // remove redundant name load and store instructions - PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); - return true; -} +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index 6a3dd39d0..c9b913ab3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -8,6 +8,11 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" +bool s_hasLZCNTSupport = false; +bool s_hasMOVBESupport = false; +bool s_hasBMI2Support = false; +bool s_hasAVXSupport = false; + sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping { REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX @@ -351,152 +356,143 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - if( false )//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS ) + if( indexed && realRegisterMem == realRegisterMem2 ) { - // load u8/u16/u32 via direct memory access + optional sign extend - assert_dbg(); // todo + return false; } - else + if( indexed && realRegisterData == realRegisterMem2 ) { - if( indexed && realRegisterMem == realRegisterMem2 ) - { - return false; - } - if( indexed && realRegisterData == realRegisterMem2 ) + // for indexed memory access realRegisterData must not be the same register as the second memory register, + // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 + sint32 temp = realRegisterMem; + realRegisterMem = realRegisterMem2; + realRegisterMem2 = temp; + } + + bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; + bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + if( imlInstruction->op_storeLoad.copyWidth == 32 ) + { + //if( indexed ) + // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if (indexed) { - // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; + x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); } - - bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; - bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; - if( imlInstruction->op_storeLoad.copyWidth == 32 ) + if( IMLBackendX64_HasExtensionMOVBE() && switchEndian ) { - //if( indexed ) - // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (indexed) { - x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); - } - if( g_CPUFeatures.x86.movbe && switchEndian ) - { - if (indexed) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (indexed && realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - } + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + //if (indexed && realRegisterMem != realRegisterData) + // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - if (indexed) - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (switchEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if (switchEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); } } - else if( imlInstruction->op_storeLoad.copyWidth == 16 ) + else { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available if (indexed) { - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - if( g_CPUFeatures.x86.movbe && switchEndian ) - { - x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + //if (realRegisterMem != realRegisterData) + // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if (switchEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); } else { - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if (switchEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); } - if( signExtend ) - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); - } - else if( imlInstruction->op_storeLoad.copyWidth == 8 ) - { - if( indexed ) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // todo: Optimize by using only MOVZX/MOVSX - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Use sign extend move from memory instead of separate sign-extend? - if( signExtend ) - x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - else - x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + } + } + else if( imlInstruction->op_storeLoad.copyWidth == 16 ) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available + if (indexed) + { + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + if(IMLBackendX64_HasExtensionMOVBE() && switchEndian ) + { + x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) + else { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_storeLoad.immS32 != 0 ) - assert_dbg(); // not supported - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( switchEndian ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation - // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); + x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 ) - { + if( signExtend ) + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); + else + x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); + } + else if( imlInstruction->op_storeLoad.copyWidth == 8 ) + { + if( indexed ) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( switchEndian == false ) - assert_dbg(); - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) - if( g_CPUFeatures.x86.movbe ) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00); + // todo: Optimize by using only MOVZX/MOVSX + if( indexed ) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + // todo: Use sign extend move from memory instead of separate sign-extend? + if( signExtend ) + x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + else + x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if( imlInstruction->op_storeLoad.immS32 != 0 ) + assert_dbg(); // not supported + if( indexed ) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if( switchEndian ) + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation + // LWARX instruction costs extra cycles (this speeds up busy loops) + x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); + } + else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 ) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if( switchEndian == false ) + assert_dbg(); + if( indexed ) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) + if(IMLBackendX64_HasExtensionMOVBE()) + { + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else - return false; - return true; + { + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); + } + x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00); } - return false; + else + return false; + return true; } /* @@ -510,169 +506,160 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - if (false)//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS ) + if (indexed && realRegisterMem == realRegisterMem2) { - // load u8/u16/u32 via direct memory access + optional sign extend - assert_dbg(); // todo + return false; } - else + if (indexed && realRegisterData == realRegisterMem2) { - if (indexed && realRegisterMem == realRegisterMem2) - { - return false; - } - if (indexed && realRegisterData == realRegisterMem2) - { - // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; - } + // for indexed memory access realRegisterData must not be the same register as the second memory register, + // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 + sint32 temp = realRegisterMem; + realRegisterMem = realRegisterMem2; + realRegisterMem2 = temp; + } - bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; - bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; - if (imlInstruction->op_storeLoad.copyWidth == 32) + bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; + bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + if (imlInstruction->op_storeLoad.copyWidth == 32) + { + if (indexed) + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + uint32 valueRegister; + if ((swapEndian == false || IMLBackendX64_HasExtensionMOVBE()) && realRegisterMem != realRegisterData) { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 valueRegister; - if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData) - { - valueRegister = realRegisterData; - } - else - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - valueRegister = REG_RESV_TEMP; - } - if (g_CPUFeatures.x86.movbe == false && swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (g_CPUFeatures.x86.movbe && swapEndian) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + valueRegister = realRegisterData; } - else if (imlInstruction->op_storeLoad.copyWidth == 16) + else { - if (indexed || swapEndian) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Optimize this, e.g. by using MOVBE - } - else if (imlInstruction->op_storeLoad.copyWidth == 8) - { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (indexed && realRegisterMem == realRegisterData) - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - realRegisterData = REG_RESV_TEMP; - } - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { + valueRegister = REG_RESV_TEMP; + } + if (!IMLBackendX64_HasExtensionMOVBE() && swapEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if (IMLBackendX64_HasExtensionMOVBE() && swapEndian) + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + else + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else if (imlInstruction->op_storeLoad.copyWidth == 16) + { + if (indexed || swapEndian) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->op_storeLoad.immS32 != 0) - assert_dbg(); // todo - // reset cr0 LT, GT and EQ - sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // realRegisterMem now holds EA - x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // EA matches reservation - // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - // backup REG_RESV_MEMBASE - x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); - // add mem register to REG_RESV_MEMBASE - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); - // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); - // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX); - - //x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP); - x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - - // reset reservation - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); - - // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - // restore REG_RESV_MEMBASE - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); - - // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2) - { + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + if (swapEndian) + x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + // todo: Optimize this, e.g. by using MOVBE + } + else if (imlInstruction->op_storeLoad.copyWidth == 8) + { + if (indexed) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes .. - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3) + if (indexed && realRegisterMem == realRegisterData) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + realRegisterData = REG_RESV_TEMP; + } + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if (imlInstruction->op_storeLoad.immS32 != 0) + assert_dbg(); // todo + // reset cr0 LT, GT and EQ + sint32 crRegister = 0; + x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); + // calculate effective address + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + if (swapEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + // realRegisterMem now holds EA + x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); + sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex; + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); + // EA matches reservation + // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) + x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); + // backup REG_RESV_MEMBASE + x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); + // add mem register to REG_RESV_MEMBASE + x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); + // load reserved value in EAX + x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); + // bswap EAX + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX); + + //x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP); + x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); + + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); + + // reset reservation + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); + + // restore EAX + x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + // restore REG_RESV_MEMBASE + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); + + // copy XER SO to CR0 SO + x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); + // end + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); + } + else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes .. + x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP); + x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3) + { + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - return false; - return true; + x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); + x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP); + x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); + x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP); + x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); + x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP); + + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - return false; + else + return false; + return true; } bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -781,7 +768,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // count leading zeros PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( g_CPUFeatures.x86.lzcnt ) + // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) + if(IMLBackendX64_HasExtensionLZCNT()) { x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); } @@ -1499,12 +1487,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) + if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SRW) { // use BMI2 SHRX if available x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW) + else if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SLW) { // use BMI2 SHLX if available x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); @@ -2656,4 +2644,79 @@ void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions() PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited); -} \ No newline at end of file +} + +bool IMLBackendX64_HasExtensionLZCNT() +{ + return s_hasLZCNTSupport; +} + +bool IMLBackendX64_HasExtensionMOVBE() +{ + return s_hasMOVBESupport; +} + +bool IMLBackendX64_HasExtensionBMI2() +{ + return s_hasBMI2Support; +} + +bool IMLBackendX64_HasExtensionAVX() +{ + return s_hasAVXSupport; +} + +void IMLBackendX64_Init() +{ + // init x64 recompiler instance data + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL; + ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31); + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL; + ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0; + ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0; + ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0; + ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0; + ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f; + *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000; + *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000; + + // mxcsr + ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000; + ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80; + + // query processor extensions + int cpuInfo[4]; + cpuid(cpuInfo, 0x80000001); + s_hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0; + cpuid(cpuInfo, 0x1); + s_hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0; + s_hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0; + cpuidex(cpuInfo, 0x7, 0); + s_hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0; + + forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", s_hasLZCNTSupport ? "LZCNT " : "", s_hasMOVBESupport ? "MOVBE " : "", s_hasAVXSupport ? "AVX " : ""); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h index 3df2b7619..ebfc55c9f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h @@ -131,6 +131,12 @@ enum #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register +void IMLBackendX64_Init(); + +bool IMLBackendX64_HasExtensionLZCNT(); +bool IMLBackendX64_HasExtensionMOVBE(); +bool IMLBackendX64_HasExtensionBMI2(); +bool IMLBackendX64_HasExtensionAVX(); bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp index 618c51a28..b39d31c09 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp @@ -87,7 +87,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, { x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg); - if (g_CPUFeatures.x86.movbe) + if (IMLBackendX64_HasExtensionMOVBE()) { x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); } @@ -99,7 +99,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, } else { - if (g_CPUFeatures.x86.movbe) + if (IMLBackendX64_HasExtensionMOVBE()) { x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); } @@ -109,7 +109,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); } } - if (g_CPUFeatures.x86.avx) + if (IMLBackendX64_HasExtensionAVX()) { x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); } @@ -281,29 +281,21 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio { x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - if( g_CPUFeatures.x86.movbe ) + if(IMLBackendX64_HasExtensionMOVBE()) x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); else x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); } else { - if( g_CPUFeatures.x86.movbe ) + if(IMLBackendX64_HasExtensionMOVBE()) x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); else x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); } - if( g_CPUFeatures.x86.movbe == false ) + if(IMLBackendX64_HasExtensionMOVBE() == false ) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if( g_CPUFeatures.x86.avx ) - { - x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP); - } - else - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } + x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP); if (imlInstruction->op_storeLoad.flags2.notExpanded) { @@ -317,7 +309,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio } else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 ) { - if( g_CPUFeatures.x86.avx ) + if( IMLBackendX64_HasExtensionAVX() ) { if( indexed ) { @@ -420,23 +412,15 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0) { x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - if (g_CPUFeatures.x86.avx) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - if (g_CPUFeatures.x86.movbe == false) + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); + if (IMLBackendX64_HasExtensionMOVBE() == false) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if (indexed) { cemu_assert_debug(memReg != memRegEx); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); } - if (g_CPUFeatures.x86.movbe) + if (IMLBackendX64_HasExtensionMOVBE()) x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); else x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); @@ -605,30 +589,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti if (imlInstruction->op_storeLoad.flags2.notExpanded) { // value is already in single format - if (g_CPUFeatures.x86.avx) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); } else { x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM); - if (g_CPUFeatures.x86.avx) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); } - if( g_CPUFeatures.x86.movbe == false ) + if(IMLBackendX64_HasExtensionMOVBE() == false ) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if( indexed ) { @@ -636,7 +604,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti assert_dbg(); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - if( g_CPUFeatures.x86.movbe ) + if(IMLBackendX64_HasExtensionMOVBE()) x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); else x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); @@ -669,15 +637,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti } else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 ) { - if( g_CPUFeatures.x86.avx ) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if( indexed ) { @@ -1057,7 +1017,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); } - else if (g_CPUFeatures.x86.avx) + else if (IMLBackendX64_HasExtensionAVX()) { x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB); } From db60ea65354d7b7864d09ce427d5af543cbc9bcd Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 7 Nov 2022 04:03:05 +0100 Subject: [PATCH 11/64] PPCRec: Move X64 files into subdirectory and rename --- src/Cafe/CMakeLists.txt | 16 ++++++++-------- .../Espresso/Interpreter/PPCInterpreterOPC.cpp | 1 - .../BackendX64.cpp} | 6 +++--- .../BackendX64.h} | 4 +++- .../BackendX64AVX.cpp} | 10 ++++------ .../BackendX64BMI.cpp} | 3 +-- .../BackendX64FPU.cpp} | 12 ++++++------ .../BackendX64Gen.cpp} | 4 +--- .../BackendX64GenFPU.cpp} | 4 +--- .../{x64Emit.hpp => BackendX64/X64Emit.hpp} | 0 .../HW/Espresso/Recompiler/IML/IMLOptimizer.cpp | 2 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 3 ++- .../IML/IMLRegisterAllocatorRanges.cpp | 1 - .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 4 ++-- 14 files changed, 32 insertions(+), 38 deletions(-) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64.cpp => BackendX64/BackendX64.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64.h => BackendX64/BackendX64.h} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64AVX.cpp => BackendX64/BackendX64AVX.cpp} (92%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64BMI.cpp => BackendX64/BackendX64BMI.cpp} (98%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64FPU.cpp => BackendX64/BackendX64FPU.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64Gen.cpp => BackendX64/BackendX64Gen.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{PPCRecompilerX64GenFPU.cpp => BackendX64/BackendX64GenFPU.cpp} (99%) rename src/Cafe/HW/Espresso/Recompiler/{x64Emit.hpp => BackendX64/X64Emit.hpp} (100%) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 8c2dd5f4f..0ced95c54 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -83,14 +83,14 @@ add_library(CemuCafe HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp HW/Espresso/Recompiler/PPCRecompilerIml.h HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp - HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp - HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp - HW/Espresso/Recompiler/PPCRecompilerX64.cpp - HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp - HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp - HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp - HW/Espresso/Recompiler/PPCRecompilerX64.h - HW/Espresso/Recompiler/x64Emit.hpp + HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64.h + HW/Espresso/Recompiler/BackendX64/X64Emit.hpp HW/Latte/Common/RegisterSerializer.cpp HW/Latte/Common/RegisterSerializer.h HW/Latte/Common/ShaderSerializer.cpp diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp index 12f86427b..d6b643eed 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp @@ -5,7 +5,6 @@ #include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h" #include "../Recompiler/PPCRecompiler.h" -#include "../Recompiler/PPCRecompilerX64.h" #include #include "Cafe/HW/Latte/Core/LatteBufferCache.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index c9b913ab3..f5b249672 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1,9 +1,9 @@ #include "Cafe/HW/Espresso/PPCState.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "BackendX64.h" #include "Cafe/OS/libs/coreinit/coreinit_Time.h" #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index ebfc55c9f..60cc1e2af 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -1,4 +1,6 @@ +#include "../PPCRecompiler.h" // todo - get rid of dependency + struct x64RelocEntry_t { uint32 offset; @@ -138,7 +140,7 @@ bool IMLBackendX64_HasExtensionMOVBE(); bool IMLBackendX64_HasExtensionBMI2(); bool IMLBackendX64_HasExtensionAVX(); -bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); +bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp similarity index 92% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp index 619c3985b..b0ef8640e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp @@ -1,5 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32); @@ -21,11 +20,10 @@ void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 a x64Gen_writeU8(x64GenContext, opcode); } -#define VEX_PP_0F 0 // guessed +#define VEX_PP_0F 0 #define VEX_PP_66_0F 1 -#define VEX_PP_F3_0F 2 // guessed -#define VEX_PP_F2_0F 3 // guessed - +#define VEX_PP_F3_0F 2 +#define VEX_PP_F2_0F 3 void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp similarity index 98% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp index 5a71e93d9..c9ffc4649 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp @@ -1,5 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index b39d31c09..e01b0ac5b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -1,8 +1,8 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "asm/x64util.h" -#include "Common/cpu_features.h" +#include "../PPCRecompiler.h" +#include "../IML/IML.h" +#include "BackendX64.h" + +#include "asm/x64util.h" // for recompiler_fres / frsqrte void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { @@ -710,7 +710,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction // unpack top to bottom and top x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } - //else if ( g_CPUFeatures.x86.avx ) + //else if ( hasAVXSupport ) //{ // // unpack top to bottom and top with non-destructive destination // // update: On Ivy Bridge this causes weird stalls? diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 19327f465..3abecb753 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -1,6 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" // x86/x64 extension opcodes that could be useful: // ANDN diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp index 92289d68b..06f79b9c3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp @@ -1,6 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode) { diff --git a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp similarity index 100% rename from src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 88463447f..cd0f07c07 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -4,7 +4,7 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" -#include "../PPCRecompilerX64.h" +#include "../BackendX64/BackendX64.h" struct replacedRegisterTracker_t { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index a75f634fd..08d776e72 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -2,9 +2,10 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" -#include "../PPCRecompilerX64.h" #include "IMLRegisterAllocatorRanges.h" +#include "../BackendX64/BackendX64.h" + uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 64a0966e0..14159c772 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -1,6 +1,5 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" -#include "../PPCRecompilerX64.h" #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 6c3cbde3b..9ff113b14 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -2,7 +2,6 @@ #include "PPCFunctionBoundaryTracker.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" #include "Cafe/OS/RPL/rpl.h" #include "util/containers/RangeStore.h" #include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h" @@ -14,7 +13,8 @@ #include "util/helpers/helpers.h" #include "util/MemMapper/MemMapper.h" -#include "Cafe/HW/Espresso/Recompiler/IML/IML.h" +#include "IML/IML.h" +#include "BackendX64/BackendX64.h" struct PPCInvalidationRange { From ce8dc5526c79699596a106d6b14bb52678dff18e Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 08:50:29 +0100 Subject: [PATCH 12/64] PPCRec: Reworked IML builder to work with basic-blocks Intermediate commit while I'm still fixing things but I didn't want to pile on too many changes in a single commit. New: Reworked PPC->IML converter to first create a graph of basic blocks and then turn those into IML segment(s). This was mainly done to decouple IML design from having PPC specific knowledge like branch target addresses. The previous design also didn't allow to preserve cycle counting properly in all cases since it was based on IML instruction counting. The new solution supports functions with non-continuous body. A pretty common example for this is when functions end with a trailing B instruction to some other place. Current limitations: - BL inlining not implemented - MFTB not implemented - BCCTR and BCLR are only partially implemented Undo vcpkg change --- src/Cafe/HW/Espresso/EspressoISA.h | 20 +- .../Recompiler/BackendX64/BackendX64.cpp | 137 +- .../Recompiler/BackendX64/BackendX64.h | 19 +- .../Recompiler/BackendX64/BackendX64Gen.cpp | 5 - .../Recompiler/BackendX64/X64Emit.hpp | 1 - src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 2 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 62 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 8 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 109 +- .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 29 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 28 +- .../Recompiler/PPCFunctionBoundaryTracker.h | 45 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 21 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 28 + .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 7 +- .../Recompiler/PPCRecompilerImlGen.cpp | 1534 ++++++++++++----- .../Recompiler/PPCRecompilerIntermediate.cpp | 106 +- src/Cemu/Logging/CemuLogging.h | 2 +- 18 files changed, 1514 insertions(+), 649 deletions(-) diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index b3ae45c30..e66e1424e 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -91,13 +91,15 @@ namespace Espresso BCCTR = 528 }; - enum class OPCODE_31 + enum class Opcode31 { - + TW = 4, + MFTB = 371, }; inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); }; inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); }; + inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); }; struct BOField { @@ -132,6 +134,12 @@ namespace Espresso uint8 bo; }; + // returns true if LK bit is set, only valid for branch instructions + inline bool DecodeLK(uint32 opcode) + { + return (opcode & 1) != 0; + } + inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK) { LI = opcode & 0x3fffffc; @@ -183,13 +191,7 @@ namespace Espresso _decodeForm_D_branch(opcode, BD, BO, BI, AA, LK); } - inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) - { - // form XL (with BD field expected to be zero) - _decodeForm_XL(opcode, BO, BI, LK); - } - - inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) + inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR { // form XL (with BD field expected to be zero) _decodeForm_XL(opcode, BO, BI, LK); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index f5b249672..1267be79a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -24,15 +24,7 @@ sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping */ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) { - if( x64GenContext->relocateOffsetTableCount >= x64GenContext->relocateOffsetTableSize ) - { - x64GenContext->relocateOffsetTableSize = std::max(4, x64GenContext->relocateOffsetTableSize*2); - x64GenContext->relocateOffsetTable = (x64RelocEntry_t*)realloc(x64GenContext->relocateOffsetTable, sizeof(x64RelocEntry_t)*x64GenContext->relocateOffsetTableSize); - } - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].offset = x64GenContext->codeBufferIndex; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].type = type; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].extraInfo = extraInfo; - x64GenContext->relocateOffsetTableCount++; + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); } /* @@ -306,6 +298,9 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) { + // according to MS ABI the caller needs to save: + // RAX, RCX, RDX, R8, R9, R10, R11 + uint32 ppcAddress = imlInstruction->op_macro.param; uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; @@ -321,7 +316,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, // reserve space on stack for call parameters x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8); x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); - // call HLE function + // call function if( sprId == SPR_TBL ) x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL); else if( sprId == SPR_TBU ) @@ -1971,6 +1966,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n"); + return false; + } + if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { // jump always @@ -1985,19 +1986,25 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec else { // deprecated (jump to jumpmark) + __debugbreak(); // deprecated PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); x64Gen_jmp_imm32(x64GenContext, 0); } } else { - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - assert_dbg(); + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("Unsupported deprecated cjump to ppc address\n"); + return false; + } + cemu_assert_debug(imlSegment->nextSegmentBranchTaken); + // generate jump update marker if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); sint32 condition = imlInstruction->op_conditionalJump.condition; if( condition == PPCREC_JUMP_CONDITION_E ) x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); @@ -2015,19 +2022,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); return true; } @@ -2036,19 +2043,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -2057,26 +2064,28 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } } + cemu_assert_debug(false); // should not reach? } x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + cemu_assert_debug(imlSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) { x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2094,13 +2103,14 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) - // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write) + // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC // BT x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); + cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); + x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); return true; } @@ -2152,22 +2162,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; - // generate code - if( ppcImlGenContext->hasFPUInstruction ) - { - // old FPU unavailable code - //PPCRecompilerX86_crConditionFlags_saveBeforeOverwrite(PPCRecFunction, ppcImlGenContext, x64GenContext); - //// skip if FP bit in MSR is set - //// #define MSR_FP (1<<13) - //x64Gen_bt_mem8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, msr), 13); - //uint32 jmpCodeOffset = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_CARRY, 0); - //x64Gen_mov_reg32_imm32(x64GenContext, REG_EAX, imlInstruction->op_ppcEnter.ppcAddress&0x7FFFFFFF); - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X86_RELOC_MAKE_RELATIVE); - //x64Gen_jmp_imm32(x64GenContext, (uint32)PPCRecompiler_recompilerCallEscapeAndCallFPUUnavailable); - //// patch jump - //*(uint32*)(x64GenContext->codeBuffer+jmpCodeOffset+2) = x64GenContext->codeBufferIndex-jmpCodeOffset-6; - } } void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -2193,7 +2187,6 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, } else assert_dbg(); - //x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr)+sizeof(uint32)*(name-PPCREC_NAME_SPR0)); } else assert_dbg(); @@ -2256,7 +2249,7 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2266,6 +2259,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo bool codeGenerationFailed = false; for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { + x64GenContext.currentSegment = segIt; segIt->x64Offset = x64GenContext.codeBufferIndex; for(size_t i=0; iimlList.size(); i++) { @@ -2442,48 +2436,43 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if( codeGenerationFailed ) { free(x64GenContext.codeBuffer); - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); return false; } // allocate executable memory uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); size_t baseAddress = (size_t)executableMemory; // fix relocs - for(sint32 i=0; isegmentList2) - { - if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) - { - x64Offset = segIt->x64Offset; - break; - } - } - if (x64Offset == 0xFFFFFFFF) - { - debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // todo: Cleanup - return false; - } + if (relocIt.type == X64_RELOC_LINK_TO_PPC) + { + cemu_assert_suspicious(); + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) + // { + // x64Offset = segIt->x64Offset; + // break; + // } + //} + //if (x64Offset == 0xFFFFFFFF) + //{ + // debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); + // // todo: Cleanup + // return false; + //} } else { - IMLSegment* destSegment = (IMLSegment*)x64GenContext.relocateOffsetTable[i].extraInfo; + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; x64Offset = destSegment->x64Offset; } - uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset; + uint32 relocBase = relocIt.offset; uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { @@ -2525,8 +2514,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); free(x64GenContext.codeBuffer); x64GenContext.codeBuffer = nullptr; - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); // set code PPCRecFunction->x86Code = executableMemory; PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; @@ -2535,7 +2522,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo void PPCRecompilerX64Gen_generateEnterRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2615,7 +2602,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 128; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 60cc1e2af..0548f402d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -3,6 +3,8 @@ struct x64RelocEntry_t { + x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; + uint32 offset; uint8 type; void* extraInfo; @@ -10,16 +12,16 @@ struct x64RelocEntry_t struct x64GenContext_t { - uint8* codeBuffer; - sint32 codeBufferIndex; - sint32 codeBufferSize; + IMLSegment* currentSegment{}; + + uint8* codeBuffer{}; + sint32 codeBufferIndex{}; + sint32 codeBufferSize{}; // cr state - sint32 activeCRRegister; // current x86 condition flags reflect this cr* register - sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned) + sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register + sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) // relocate offsets - x64RelocEntry_t* relocateOffsetTable; - sint32 relocateOffsetTableSize; - sint32 relocateOffsetTableCount; + std::vector relocateOffsetTable2; }; // Some of these are defined by winnt.h and gnu headers @@ -126,7 +128,6 @@ enum #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) -#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction #define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset #define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 3abecb753..33ff52ac1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -5,11 +5,6 @@ // mulx, rorx, sarx, shlx, shrx // PDEP, PEXT -void x64Gen_checkBuffer(x64GenContext_t* x64GenContext) -{ - // todo -} - void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp index e936f1d85..b40219311 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp @@ -203,7 +203,6 @@ template void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB) { static_assert(TA::getType() == MODRM_OPR_TYPE::REG); - x64Gen_checkBuffer(x64GenContext); // REX prefix // 0100 WRXB if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 06f398156..72a2d3f5d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -28,5 +28,5 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); // debug -void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 69d8e1b7d..560f5de1f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -104,31 +104,48 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml } } -void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) +std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) +{ + if (!ctx) + { + return ""; + } + // find segment index + for (size_t i = 0; i < ctx->segmentList2.size(); i++) + { + if (ctx->segmentList2[i] == seg) + { + return fmt::format("Seg{:04x}", i); + } + } + return ""; +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); - strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); if (imlSegment->isEnterable) { strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); } - else if (imlSegment->isJumpDestination) - { - strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - } + //else if (imlSegment->isJumpDestination) + //{ + // strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); + //} debug_printf("%s\n", strOutput.c_str()); - strOutput.reset(); - strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - debug_printf("%s", strOutput.c_str()); + //strOutput.reset(); + //strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); + //debug_printf("%s", strOutput.c_str()); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); } - debug_printf("\n"); + //debug_printf("\n"); sint32 lineOffsetParameters = 18; @@ -376,22 +393,22 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); + strOutput.addFmt("CYCLE_CHECK\n"); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { @@ -451,14 +468,15 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin { if (i) debug_printf(", "); - debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); + debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); } debug_printf("\n"); - debug_printf("Links to: "); if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); + debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); if (imlSegment->nextSegmentBranchTaken) - debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); + debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); + if (imlSegment->nextSegmentIsUncertain) + debug_printf("Dynamic target\n"); debug_printf("\n"); } @@ -466,7 +484,7 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) { for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - IMLDebug_DumpSegment(ppcImlGenContext->segmentList2[i], i); + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false); debug_printf("\n"); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 18cf580d3..c86bb6103 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -152,7 +152,7 @@ enum PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, PPCREC_IML_TYPE_CJUMP, // conditional jump - PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 + PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) // conditional @@ -420,6 +420,11 @@ struct IMLInstruction op_jumpmark.address = address; } + void make_debugbreak(uint32 currentPPCAddress = 0) + { + make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0); + } + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) { type = PPCREC_IML_TYPE_MACRO; @@ -431,6 +436,7 @@ struct IMLInstruction void make_ppcEnter(uint32 ppcAddress) { + cemu_assert_suspicious(); // removed type = PPCREC_IML_TYPE_PPC_ENTER; operation = 0; op_ppcEnter.ppcAddress = ppcAddress; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 08d776e72..b90aa9b18 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -74,44 +74,44 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml } } -typedef struct -{ - sint32 name; - sint32 virtualRegister; - sint32 physicalRegister; - bool isDirty; -}raRegisterState_t; - -const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; - -raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].virtualRegister == virtualRegister) - { -#ifdef CEMU_DEBUG_ASSERT - if (regState[i].physicalRegister < 0) - assert_dbg(); -#endif - return regState + i; - } - } - return nullptr; -} - -raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].physicalRegister < 0) - { - regState[i].physicalRegister = i; - return regState + i; - } - } - return nullptr; -} +//typedef struct +//{ +// sint32 name; +// sint32 virtualRegister; +// sint32 physicalRegister; +// bool isDirty; +//}raRegisterState_t; + +//const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; +// +//raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].virtualRegister == virtualRegister) +// { +//#ifdef CEMU_DEBUG_ASSERT +// if (regState[i].physicalRegister < 0) +// assert_dbg(); +//#endif +// return regState + i; +// } +// } +// return nullptr; +//} +// +//raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].physicalRegister < 0) +// { +// regState[i].physicalRegister = i; +// return regState + i; +// } +// } +// return nullptr; +//} typedef struct { @@ -309,18 +309,32 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } -bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { + // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) + // algorithm goes as follows: + // 1) Iterate all instructions from beginning to end and keep a list of covering ranges + // 2) If we encounter an instruction with a fixed-register we: + // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction + // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves + // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction + // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range + // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range + // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care + // assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already + + // todo +} +bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ // sort subranges ascending by start index - - //std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr); _sortSegmentAllSubrangesLinkedList(imlSegment); + PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment); + raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; - //sint32 subrangeIndex = 0; - //for (auto& subrange : imlSegment->raInfo.list_subranges) raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { @@ -365,7 +379,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - // find free register + // find free register for this segment uint32 physRegisterMask = (1<range); physRegisterMask &= allowedPhysRegisterMask; } @@ -761,7 +776,6 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - //for (auto& subrange : imlSegment->raInfo.list_subranges) while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) @@ -933,7 +947,7 @@ void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code @@ -985,7 +999,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) { - PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); + PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); @@ -1243,7 +1257,6 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe if (remainingScanDist <= 0) return; // can't reach end - // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index 4882a0a15..2b2c56a21 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -1,6 +1,13 @@ #include "IMLInstruction.h" #include "IMLSegment.h" +void IMLSegment::SetEnterable(uint32 enterAddress) +{ + cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress); + isEnterable = true; + enterPPCAddress = enterAddress; +} + bool IMLSegment::HasSuffixInstruction() const { if (imlList.empty()) @@ -16,8 +23,30 @@ IMLInstruction* IMLSegment::GetLastInstruction() return &imlList.back(); } +void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchNotTaken) + nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this)); + nextSegmentBranchNotTaken = imlSegmentDst; + if(imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchTaken) + nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this)); + nextSegmentBranchTaken = imlSegmentDst; + if (imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +IMLInstruction* IMLSegment::AppendInstruction() +{ + IMLInstruction& inst = imlList.emplace_back(); + memset(&inst, 0, sizeof(IMLInstruction)); + return &inst; +} void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 1e27d303a..7ea7903bc 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -103,8 +103,8 @@ struct IMLSegment bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true // jump destination segments - bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - uint32 jumpDestinationPPCAddress{}; + //bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps + //uint32 jumpDestinationPPCAddress{}; // PPC FPR use mask bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask @@ -115,10 +115,30 @@ struct IMLSegment PPCSegmentRegisterAllocatorInfo_t raInfo{}; PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; - // segment points - IMLSegmentPoint* segmentPointList{}; + + // segment state API + void SetEnterable(uint32 enterAddress); + void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); + void SetLinkBranchTaken(IMLSegment* imlSegmentDst); + + IMLSegment* GetBranchTaken() + { + return nextSegmentBranchTaken; + } + + IMLSegment* GetBranchNotTaken() + { + return nextSegmentBranchNotTaken; + } + + // instruction API + IMLInstruction* AppendInstruction(); + bool HasSuffixInstruction() const; IMLInstruction* GetLastInstruction(); + + // segment points + IMLSegmentPoint* segmentPointList{}; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h index e558292bb..96b5143e9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h @@ -21,6 +21,16 @@ class PPCFunctionBoundaryTracker }; public: + ~PPCFunctionBoundaryTracker() + { + while (!map_ranges.empty()) + { + PPCRange_t* range = *map_ranges.begin(); + delete range; + map_ranges.erase(map_ranges.begin()); + } + } + void trackStartPoint(MPTR startAddress) { processRange(startAddress, nullptr, nullptr); @@ -40,10 +50,34 @@ class PPCFunctionBoundaryTracker return false; } + std::vector GetRanges() + { + std::vector r; + for (auto& it : map_ranges) + r.emplace_back(*it); + return r; + } + + bool ContainsAddress(uint32 addr) const + { + for (auto& it : map_ranges) + { + if (addr >= it->startAddress && addr < it->getEndAddress()) + return true; + } + return false; + } + + const std::set& GetBranchTargets() const + { + return map_branchTargetsAll; + } + private: void addBranchDestination(PPCRange_t* sourceRange, MPTR address) { - map_branchTargets.emplace(address); + map_queuedBranchTargets.emplace(address); + map_branchTargetsAll.emplace(address); } // process flow of instruction @@ -114,7 +148,7 @@ class PPCFunctionBoundaryTracker Espresso::BOField BO; uint32 BI; bool LK; - Espresso::decodeOp_BCLR(opcode, BO, BI, LK); + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); if (BO.branchAlways() && !LK) { // unconditional BLR @@ -218,7 +252,7 @@ class PPCFunctionBoundaryTracker auto rangeItr = map_ranges.begin(); PPCRange_t* previousRange = nullptr; - for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); ) + for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); ) { while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr)) { @@ -239,7 +273,7 @@ class PPCFunctionBoundaryTracker (*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length)) { // delete visited targets - targetItr = map_branchTargets.erase(targetItr); + targetItr = map_queuedBranchTargets.erase(targetItr); continue; } @@ -289,5 +323,6 @@ class PPCFunctionBoundaryTracker }; std::set map_ranges; - std::set map_branchTargets; + std::set map_queuedBranchTargets; + std::set map_branchTargetsAll; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 9ff113b14..8ec2f545b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -131,7 +131,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress) } bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext); -PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut) +PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker) { if (range.startAddress >= PPC_REC_CODE_AREA_END) { @@ -156,10 +156,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; - + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; - bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses); + bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { delete ppcRecFunc; @@ -173,6 +173,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x12345678) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + // Large functions for testing (botw): + // 3B4049C + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) @@ -181,6 +191,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } // collect list of PPC-->x64 entry points + cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); + cemu_assert_debug(ppcImlGenContext.imlListCount == 0); + entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { @@ -359,7 +372,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address) PPCRecompilerState.recompilerSpinlock.unlock(); std::vector> functionEntryPoints; - auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints); + auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries); if (!func) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index e943d8d37..10cd0aa02 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -31,9 +31,12 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp struct ppcImlGenContext_t { + class PPCFunctionBoundaryTracker* boundaryTracker; PPCRecFunction_t* functionRef; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; + IMLSegment* currentOutputSegment; + struct PPCBasicBlockInfo* currentBasicBlock{}; // fpr mode bool LSQE{ true }; bool PSE{ true }; @@ -82,6 +85,31 @@ struct ppcImlGenContext_t { return *PPCRecompilerImlGen_generateNewEmptyInstruction(this); } + + IMLSegment* NewSegment() + { + IMLSegment* seg = new IMLSegment(); + segmentList2.emplace_back(seg); + return seg; + } + + size_t GetSegmentIndex(IMLSegment* seg) + { + for (size_t i = 0; i < segmentList2.size(); i++) + { + if (segmentList2[i] == seg) + return i; + } + cemu_assert_error(); + return 0; + } + + IMLSegment* InsertSegment(size_t index) + { + IMLSegment* newSeg = new IMLSegment(); + segmentList2.insert(segmentList2.begin() + index, 1, newSeg); + return newSeg; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 3b8783f50..0521c440b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,7 +1,12 @@ #define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker); + +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); + +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index a1cb6f2e2..6d488b174 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1,28 +1,74 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "Cafe/HW/Espresso/EspressoISA.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "IML/IML.h" #include "IML/IMLRegisterAllocatorRanges.h" +#include "PPCFunctionBoundaryTracker.h" + +struct PPCBasicBlockInfo +{ + PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress) + { + isEnterable = entryAddresses.find(startAddress) != entryAddresses.end(); + } + + uint32 startAddress; + uint32 lastAddress; // inclusive + bool isEnterable{ false }; + //uint32 enterableAddress{}; -> covered by startAddress + bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasBranchTarget{ false }; + uint32 branchTarget{}; + + // associated IML segments + IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments + IMLSegment* appendSegment{}; // last segment in chain, new instructions should be appended to this segment + + void SetInitialSegment(IMLSegment* seg) + { + cemu_assert_debug(!firstSegment); + cemu_assert_debug(!appendSegment); + firstSegment = seg; + appendSegment = seg; + } + + IMLSegment* GetFirstSegmentInChain() + { + return firstSegment; + } + + IMLSegment* GetSegmentForInstructionAppend() + { + return appendSegment; + } +}; bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { - if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) - { - sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); - ppcImlGenContext->imlListSize = newSize; - } - IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - memset(imlInstruction, 0x00, sizeof(IMLInstruction)); - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - ppcImlGenContext->imlListCount++; - return imlInstruction; + //if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) + //{ + // sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; + // ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); + // ppcImlGenContext->imlListSize = newSize; + //} + //IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; + //memset(imlInstruction, 0x00, sizeof(IMLInstruction)); + //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default + //imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + //ppcImlGenContext->imlListCount++; + //return imlInstruction; + + IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); + memset(&inst, 0x00, sizeof(IMLInstruction)); + inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default +//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + + return &inst; } void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) @@ -109,6 +155,8 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) { + __debugbreak(); + // jump if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -168,10 +216,13 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { + __debugbreak(); + // conditional jump IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; imlInstruction->op_conditionalJump.condition = jumpCondition; imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; @@ -179,6 +230,19 @@ void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } +void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) +{ + // conditional jump + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; + imlInstruction->op_conditionalJump.condition = jumpCondition; + imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; +} + void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory @@ -363,7 +427,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // split before and after to make sure the macro is in an isolated segment that we can make enterable + PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction); PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + middleSeg->SetLinkBranchTaken(nullptr); + middleSeg->SetLinkBranchNotTaken(nullptr); } bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -417,6 +487,9 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PPCRecompilerImlGen_MFTB(): Not supported\n"); + return false; + uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); @@ -426,6 +499,8 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // TBL / TBU uint32 param2 = spr | (rD << 16); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + return true; } return false; @@ -560,7 +635,7 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin ppcImlGenContext->cyclesSinceLastBranch++; if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) { - assert_dbg(); + cemu_assert_suspicious(); } } // add range @@ -582,33 +657,17 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { // function call - // check if function can be inlined - sint32 inlineFuncInstructionCount = 0; - if (PPCRecompiler_canInlineFunction(jumpAddressDest, &inlineFuncInstructionCount)) - { - // generate NOP iml instead of BL macro (this assures that segment PPC range remains intact) - PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext, NULL); - //cemuLog_log(LogType::Force, "Inline func 0x{:08x} at {:08x}", jumpAddressDest, ppcImlGenContext->ppcAddressOfCurrentInstruction); - uint32* prevInstructionPtr = ppcImlGenContext->currentInstruction; - ppcImlGenContext->currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(jumpAddressDest); - PPCRecompiler_generateInlinedCode(ppcImlGenContext, jumpAddressDest, inlineFuncInstructionCount); - ppcImlGenContext->currentInstruction = prevInstructionPtr; - return true; - } - // generate funtion call instructions ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); return true; } // is jump destination within recompiled function? - if( jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize) ) + if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) { - // generate instruction - PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext, NULL, jumpAddressDest); + // jump to target within same function + PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); } else { - // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR) ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); } return true; @@ -616,6 +675,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); @@ -661,11 +722,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - // generate instruction - //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } return false; @@ -678,9 +738,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); else - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); return true; } else @@ -688,8 +748,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition ) { // branch always, no condition and no decrementer - debugBreakpoint(); - crRegister = PPC_REC_INVALID_REGISTER; // not necessary but lets optimizer know we dont care for cr register on this instruction + // not supported + return false; } else { @@ -717,17 +777,20 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - if (jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize)) + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, jumpCondition, crRegister, crBit, conditionMustBeTrue); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); } else { // far jump + debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); + return false; + PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + //ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -736,6 +799,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -750,7 +815,7 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; + //bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) { cemu_assert_debug(false); @@ -760,28 +825,37 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer check + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); if( saveLR ) { ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } else { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } else { + cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + + //debug_printf("[Rec-Disable] BCLR with condition or LR\n"); + //return false; + // store LR if( saveLR ) { + cemu_assert_unimplemented(); // todo - this is difficult to handle because it needs to jump to the unmodified LR (we should cache it in a register which we pass to the macro?) + return false; + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); } // generate jump condition - if( invertedConditionMustBeTrue ) + if(conditionMustBeTrue) { if( crBit == 0 ) jumpCondition = PPCREC_JUMP_CONDITION_L; @@ -803,9 +877,17 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + + //if(conditionMustBeTrue) + // ppcImlGenContext->emitInst().make_debugbreak(ppcImlGenContext->ppcAddressOfCurrentInstruction); + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } return true; @@ -813,6 +895,8 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -826,6 +910,7 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; + // since we skip this instruction if the condition is true, we need to invert the logic bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) @@ -839,51 +924,63 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer if( saveLR ) { uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } + if (saveLR) + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); else - { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } } else { - // store LR - if( saveLR ) + // get jump condition + if (invertedConditionMustBeTrue) { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if( invertedConditionMustBeTrue ) - { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } else { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } + + // debug checks + //if (saveLR) + // cemu_assert_debug(ppcImlGenContext->currentBasicBlock->); + + // we always store LR + if (saveLR) + { + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction + 4) & 0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + } + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + __debugbreak(); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + //PPCBasicBlockInfo* bctrSeg = currentBasicBlock->Get + __debugbreak(); + + // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); @@ -2915,12 +3012,6 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont return v; } -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset) -{ - uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction + offset/4)); - return v; -} - uint32 PPCRecompiler_getCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext) { uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction)); @@ -3864,268 +3955,884 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) return unsupportedInstructionFound; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses) +// returns false if code flow is not interrupted +// continueDefaultPath: Controls if +bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { - ppcImlGenContext.functionRef = ppcRecFunc; - // add entire range - ppcRecRange_t recRange; - recRange.ppcAddress = ppcRecFunc->ppcAddress; - recRange.ppcSize = ppcRecFunc->ppcSize; - ppcRecFunc->list_ranges.push_back(recRange); - // process ppc instructions - ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); - bool unsupportedInstructionFound = false; - sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; - sint32 unsupportedInstructionCount = 0; - uint32 unsupportedInstructionLastOffset = 0; - uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; - uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; - - while(ppcImlGenContext.currentInstruction < endCurrentInstruction) + hasBranchTarget = false; + branchTarget = 0xFFFFFFFF; + makeNextInstEnterable = false; + continueDefaultPath = false; + switch (Espresso::GetPrimaryOpcode(opcode)) { - uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); - ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; - ppcImlGenContext.cyclesSinceLastBranch++; - ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); - if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) + case Espresso::PrimaryOpcode::VIRTUAL_HLE: + { + makeNextInstEnterable = true; + hasBranchTarget = false; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::BC: + { + uint32 BD, BI; + Espresso::BOField BO; + bool AA, LK; + Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK); + if (!LK) { - // add PPCEnter for addresses that are in entryAddresses - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); + hasBranchTarget = true; + branchTarget = (AA ? BD : BD) + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump } - else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) + makeNextInstEnterable = LK; + continueDefaultPath = true; + return true; + } + case Espresso::PrimaryOpcode::B: + { + uint32 LI; + bool AA, LK; + Espresso::decodeOp_B(opcode, LI, AA, LK); + if (!LK) { - // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) - uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); - uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); - if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) - { - // between two B(L) instructions - // todo: for BL only if they are not inlineable - - bool canInlineFunction = false; - if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) - { - uint32 li; - PPC_OPC_TEMPL_I(opcodePrevious, li); - sint32 inlineSize = 0; - if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) - canInlineFunction = true; - } - if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) - { - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); - if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) - { - // after unconditional BCTR instruction - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - } + hasBranchTarget = true; + branchTarget = AA ? LI : LI + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump } - - unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); - if( unsupportedInstructionFound ) + makeNextInstEnterable = LK; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::GROUP_19: + switch (Espresso::GetGroup19Opcode(opcode)) + { + //case Espresso::Opcode19::BCLR: + ////case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; // todo - set this to true if this instruction has a condition (including decrementer check) + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + + case Espresso::Opcode19::BCLR: + case Espresso::Opcode19::BCCTR: { - unsupportedInstructionCount++; - unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; - unsupportedInstructionFound = false; - //break; + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); + continueDefaultPath = !BO.conditionIgnore() || !BO.decrementerIgnore(); // if branch is always taken then there is no continued path + makeNextInstEnterable = Espresso::DecodeLK(opcode); + return true; } + default: + break; + } + break; + case Espresso::PrimaryOpcode::GROUP_31: + switch (Espresso::GetGroup31Opcode(opcode)) + { + //case Espresso::Opcode31::TW: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode31::MFTB: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode19::BCLR: + //case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + default: + break; + } + break; + default: + break; } - ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) - if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) - { - debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); - return false; - } - // optimize unused jumpmarks away - // first, flag all jumpmarks as unused - std::map map_jumpMarks; - for(sint32 i=0; i& basicBlockList, PPCFunctionBoundaryTracker& boundaryTracker, uint32 ppcStart, uint32 ppcEnd, const std::set& combinedBranchTargets, const std::set& entryAddresses) +{ + cemu_assert_debug(ppcStart <= ppcEnd); + + uint32 currentAddr = ppcStart; + + PPCBasicBlockInfo* curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + + uint32 basicBlockStart = currentAddr; + while (currentAddr <= ppcEnd) { - if( ppcImlGenContext.imlList[i].type == PPCREC_IML_TYPE_JUMPMARK ) + curBlockInfo->lastAddress = currentAddr; + uint32 opcode = memory_readU32(currentAddr); + bool nextInstIsEnterable = false; + bool hasBranchTarget = false; + bool hasContinuedFlow = false; + uint32 branchTarget = 0; + if (PPCRecompiler_CheckIfInstructionEndsSegment(boundaryTracker, currentAddr, opcode, nextInstIsEnterable, hasContinuedFlow, hasBranchTarget, branchTarget)) { - ppcImlGenContext.imlList[i].op_jumpmark.flags |= PPCREC_IML_OP_FLAG_UNUSED; -#ifdef CEMU_DEBUG_ASSERT - if (map_jumpMarks.find(ppcImlGenContext.imlList[i].op_jumpmark.address) != map_jumpMarks.end()) - assert_dbg(); -#endif - map_jumpMarks.emplace(ppcImlGenContext.imlList[i].op_jumpmark.address, ppcImlGenContext.imlList+i); + curBlockInfo->hasBranchTarget = hasBranchTarget; + curBlockInfo->branchTarget = branchTarget; + curBlockInfo->hasContinuedFlow = hasContinuedFlow; + // start new basic block, except if this is the last instruction + if (currentAddr >= ppcEnd) + break; + curBlockInfo = &basicBlockList.emplace_back(currentAddr + 4, entryAddresses); + curBlockInfo->isEnterable = curBlockInfo->isEnterable || nextInstIsEnterable; + currentAddr += 4; + continue; + } + currentAddr += 4; + if (currentAddr <= ppcEnd) + { + if (combinedBranchTargets.find(currentAddr) != combinedBranchTargets.end()) + { + // instruction is branch target, start new basic block + curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + } } + } - // second, unflag jumpmarks that have at least one reference - for(sint32 i=0; i PPCRecompiler_DetermineBasicBlockRange(PPCFunctionBoundaryTracker& boundaryTracker, const std::set& entryAddresses) +{ + cemu_assert(!entryAddresses.empty()); + std::vector basicBlockList; + + const std::set branchTargets = boundaryTracker.GetBranchTargets(); + auto funcRanges = boundaryTracker.GetRanges(); + + std::set combinedBranchTargets = branchTargets; + combinedBranchTargets.insert(entryAddresses.begin(), entryAddresses.end()); + + for (auto& funcRangeIt : funcRanges) + PPCRecompiler_DetermineBasicBlockRange(basicBlockList, boundaryTracker, funcRangeIt.startAddress, funcRangeIt.startAddress + funcRangeIt.length - 4, combinedBranchTargets, entryAddresses); + + // mark all segments that start at entryAddresses as enterable (debug code for verification, can be removed) + size_t numMarkedEnterable = 0; + for (auto& basicBlockIt : basicBlockList) { - if( ppcImlGenContext.imlList[i].type == PPCREC_IML_TYPE_CJUMP ) + if (entryAddresses.find(basicBlockIt.startAddress) != entryAddresses.end()) { - uint32 jumpDest = ppcImlGenContext.imlList[i].op_conditionalJump.jumpmarkAddress; - auto jumpMarkIml = map_jumpMarks.find(jumpDest); - if (jumpMarkIml != map_jumpMarks.end()) - jumpMarkIml->second->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; + cemu_assert_debug(basicBlockIt.isEnterable); + numMarkedEnterable++; } } - // lastly, remove jumpmarks that still have the unused flag set - sint32 currentImlIndex = 0; - for(sint32 i=0; iGetBranchNotTaken() +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + //IMLSegment* continuedSegment = ppcImlGenContext.NewSegment(); + IMLSegment* continuedSegment = ppcImlGenContext.InsertSegment(ppcImlGenContext.GetSegmentIndex(writeSegment) + 1); + + continuedSegment->SetLinkBranchTaken(writeSegment->GetBranchTaken()); + continuedSegment->SetLinkBranchNotTaken(writeSegment->GetBranchNotTaken()); + + writeSegment->SetLinkBranchNotTaken(continuedSegment); + writeSegment->SetLinkBranchTaken(nullptr); + + if (ppcImlGenContext.currentOutputSegment == writeSegment) + ppcImlGenContext.currentOutputSegment = continuedSegment; + + cemu_assert_debug(basicBlockInfo.appendSegment == writeSegment); + basicBlockInfo.appendSegment = continuedSegment; + + return writeSegment; +} + +// generates a new segment and sets it as branch target for the current write segment. Returns the created segment +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + IMLSegment* branchTargetSegment = ppcImlGenContext.NewSegment(); + cemu_assert_debug(!writeSegment->GetBranchTaken()); // must not have a target already + writeSegment->SetLinkBranchTaken(branchTargetSegment); + return branchTargetSegment; +} + +// verify that current instruction is the last instruction of the active basic block +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext) +{ + cemu_assert_debug(ppcImlGenContext.currentBasicBlock->lastAddress == ppcImlGenContext.ppcAddressOfCurrentInstruction); +} + +void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); + //if (imlSegment->imlList.empty()) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // return; + if (!basicBlockInfo.hasBranchTarget) + return; + if (basicBlockInfo.branchTarget >= basicBlockInfo.startAddress) + return; + + // exclude non-infinite tight loops + if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + return; + // potential loop segment found, split this segment into four: + // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // P1: This segment contains the ppc_leave instruction + // P2: This segment contains the iml instructions of the original segment + // PEntry: This segment is used to enter the function, it jumps to P0 + // All segments are considered to be part of the same PPC instruction range + // The first segment also retains the jump destination and enterable properties from the original segment. + //debug_printf("--- Insert cycle counter check ---\n"); + + + // make the segment enterable so execution can return after checking + basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); + + IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); + + // what we know about the crash: + // It doesnt happen with cycle checks disabled + // The debugbreak emitted here is only encountered twice before it crashes + // it doesnt seem to go into the alternative branch (cycles negative) -> tested (debugbreak in exit segment doesnt trigger) + // Its the enterable segment that causes issues? -> I removed the enterable statement and it still happened + // Maybe some general issue with getting x64 offsets for enterable segments.. + + // possible explanations: + // issue with the cycle check / exit logic + // returning from exit is causing the issue + // Segments can get marked as jump destination which we no longer do -> Deleted old code and added asserts + + IMLInstruction* inst = splitSeg->AppendInstruction(); + inst->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + inst->operation = 0; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_conditionalJump.jumpmarkAddress = 0xFFFFFFFF; + inst->associatedPPCAddress = 0xFFFFFFFF; + // PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK + + //splitSeg->AppendInstruction()->make_macro(PPCREC_IML_TYPE_MACRO, ) + + IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); + splitSeg->SetLinkBranchTaken(exitSegment); + + + //exitSegment->AppendInstruction()->make_debugbreak(); + + inst = exitSegment->AppendInstruction();// ->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress); + inst->type = PPCREC_IML_TYPE_MACRO; + inst->operation = PPCREC_IML_MACRO_LEAVE; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_macro.param = basicBlockInfo.startAddress; + inst->associatedPPCAddress = basicBlockInfo.startAddress; + + + //debug_printf("----------------------------------------\n"); + //IMLDebug_Dump(&ppcImlGenContext); + //__debugbreak(); + + //ppcImlGenContext.NewSegment(); + + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + //imlSegment = NULL; + //IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + //IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + //IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + //// create entry point segment + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + //IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + //// relink segments + //IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + //IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + //IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + //IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + //// update segments + //uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + //if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddress = 0xFFFFFFFF; + //imlSegmentP1->ppcAddress = 0xFFFFFFFF; + //imlSegmentP2->ppcAddress = 0xFFFFFFFF; + //cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + //// move segment properties from segment P2 to segment P0 + //imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + //imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->isEnterable = false; + ////imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + //imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + //imlSegmentP2->isJumpDestination = false; + //imlSegmentP2->jumpDestinationPPCAddress = 0; + //imlSegmentP2->isEnterable = false; + //imlSegmentP2->enterPPCAddress = 0; + //imlSegmentP2->ppcAddrMin = 0; + //imlSegmentP2->ppcAddrMax = 0; + //// setup enterable segment + //if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + //{ + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + //} + //// assign new jumpmark to segment P2 + //imlSegmentP2->isJumpDestination = true; + //imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + //currentLoopEscapeJumpMarker++; + //// create ppc_leave instruction in segment P1 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + //imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + //imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + //imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + //imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// create cycle-based conditional instruction in segment P0 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + //imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + //imlSegmentP0->imlList[0].operation = 0; + //imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// jump instruction for PEntry + //PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + //PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); +} + +void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + bool isLastSegment = segIt == ppcImlGenContext.segmentList2.back(); + //IMLSegment* nextSegment = isLastSegment ? nullptr : ppcImlGenContext->segmentList2[s + 1]; + // handle empty segment + if (segIt->imlList.empty()) { - memcpy(ppcImlGenContext.imlList+currentImlIndex, ppcImlGenContext.imlList+i, sizeof(IMLInstruction)); + cemu_assert_debug(segIt->GetBranchNotTaken()); + continue; } - currentImlIndex++; - } - // fix intermediate instruction count - ppcImlGenContext.imlListCount = currentImlIndex; - // divide iml instructions into segments - // each segment is defined by one or more instructions with no branches or jump destinations in between - // a branch instruction may only be the very last instruction of a segment - cemu_assert_debug(ppcImlGenContext.segmentList2.empty()); - - sint32 segmentStart = 0; - sint32 segmentImlIndex = 0; - while( segmentImlIndex < ppcImlGenContext.imlListCount ) - { - bool genNewSegment = false; - // segment definition: - // If we encounter a branch instruction -> end of segment after current instruction - // If we encounter a jumpmark -> end of segment before current instruction - // If we encounter ppc_enter -> end of segment before current instruction - if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) + // check last instruction of segment + IMLInstruction* imlInstruction = segIt->GetLastInstruction(); + if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // segment ends after current instruction - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart+1; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex+1; + cemu_assert_debug(segIt->GetBranchTaken()); + if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + } + + //// find destination segment by ppc jump address + //IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + //if (jumpDestSegment) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + //} + //else + //{ + // imlSegment->nextSegmentIsUncertain = true; + //} } - else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || - ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) + else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) { - // segment ends before current instruction - if( segmentImlIndex > segmentStart ) + auto macroType = imlInstruction->operation; + switch (macroType) { - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; + case PPCREC_IML_MACRO_BLR: + case PPCREC_IML_MACRO_BLRL: + case PPCREC_IML_MACRO_BCTR: + case PPCREC_IML_MACRO_BCTRL: + case PPCREC_IML_MACRO_BL: + case PPCREC_IML_MACRO_B_FAR: + case PPCREC_IML_MACRO_HLE: + case PPCREC_IML_MACRO_LEAVE: + segIt->nextSegmentIsUncertain = true; + break; + case PPCREC_IML_MACRO_DEBUGBREAK: + case PPCREC_IML_MACRO_COUNT_CYCLES: + case PPCREC_IML_MACRO_MFTB: + break; + default: + cemu_assert_unimplemented(); } } - segmentImlIndex++; } - if( segmentImlIndex != segmentStart ) +} + +bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunctionBoundaryTracker& boundaryTracker, std::set& entryAddresses) +{ + std::vector basicBlockList = PPCRecompiler_DetermineBasicBlockRange(boundaryTracker, entryAddresses); + + // create segments + std::unordered_map addrToBB; + ppcImlGenContext.segmentList2.resize(basicBlockList.size()); + for (size_t i = 0; i < basicBlockList.size(); i++) { - // final segment - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = new IMLSegment(); + seg->ppcAddress = basicBlockInfo.startAddress; + if(basicBlockInfo.isEnterable) + seg->SetEnterable(basicBlockInfo.startAddress); + ppcImlGenContext.segmentList2[i] = seg; + cemu_assert_debug(addrToBB.find(basicBlockInfo.startAddress) == addrToBB.end()); + basicBlockInfo.SetInitialSegment(seg); + addrToBB.emplace(basicBlockInfo.startAddress, &basicBlockInfo); } - // move iml instructions into the segments - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + // link segments + for (size_t i = 0; i < basicBlockList.size(); i++) { - uint32 imlStartIndex = segIt->startOffset; - uint32 imlCount = segIt->count; - if( imlCount > 0 ) + PPCBasicBlockInfo& bbInfo = basicBlockList[i]; + cemu_assert_debug(bbInfo.GetFirstSegmentInChain() == bbInfo.GetSegmentForInstructionAppend()); + IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + if (bbInfo.hasBranchTarget) { - cemu_assert_debug(segIt->imlList.empty()); - segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); - + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.branchTarget]; + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchTaken(seg, targetBB->GetFirstSegmentInChain()); } - else + if (bbInfo.hasContinuedFlow) { - // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - cemu_assert_debug(segIt->imlList.empty()); + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.lastAddress + 4]; + if (!targetBB) + { + cemuLog_log(LogType::Recompiler, "Recompiler was unable to link segment [0x{:08x}-0x{:08x}] to 0x{:08x}", bbInfo.startAddress, bbInfo.lastAddress, bbInfo.lastAddress + 4); + return false; + } + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchNotTaken(seg, targetBB->GetFirstSegmentInChain()); } - segIt->startOffset = 9999999; - segIt->count = 9999999; - } - // clear segment-independent iml list - free(ppcImlGenContext.imlList); - ppcImlGenContext.imlList = nullptr; - ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList - // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + } + // we assume that all unreachable segments are potentially enterable + // todo - mark them as such + + + // generate cycle counters + // in theory we could generate these as part of FillBasicBlock() but in the future we might use more complex logic to emit fewer operations + for (size_t i = 0; i < basicBlockList.size(); i++) { - uint32 segmentPPCAddrMin = 0xFFFFFFFF; - uint32 segmentPPCAddrMax = 0x00000000; - for(sint32 i=0; i< segIt->imlList.size(); i++) + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = basicBlockInfo.GetSegmentForInstructionAppend(); + + uint32 ppcInstructionCount = (basicBlockInfo.lastAddress - basicBlockInfo.startAddress + 4) / 4; + cemu_assert_debug(ppcInstructionCount > 0); + + PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); + seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; + seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + seg->imlList[0].op_macro.param = ppcInstructionCount; + } + + // generate cycle check instructions + // note: Introduces new segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext, basicBlockInfo); + } + + // fill in all the basic blocks + // note: This step introduces new segments as is necessary for some instructions + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + ppcImlGenContext.currentBasicBlock = &basicBlockInfo; + if (!PPCIMLGen_FillBasicBlock(ppcImlGenContext, basicBlockInfo)) + return false; + ppcImlGenContext.currentBasicBlock = nullptr; + } + + // mark segments with unknown jump destination (e.g. BLR and most macros) + PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext); + + // debug - check segment graph +#ifdef CEMU_DEBUG_ASSERT + //for (size_t i = 0; i < basicBlockList.size(); i++) + //{ + // IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + // if (seg->list_prevSegments.empty()) + // { + // cemu_assert_debug(seg->isEnterable); + // } + //} + // debug - check if suffix instructions are at the end of segments and if they are present for branching segments + for (size_t segIndex = 0; segIndex < ppcImlGenContext.segmentList2.size(); segIndex++) + { + IMLSegment* seg = ppcImlGenContext.segmentList2[segIndex]; + IMLSegment* nextSeg = (segIndex+1) < ppcImlGenContext.segmentList2.size() ? ppcImlGenContext.segmentList2[segIndex + 1] : nullptr; + + if (seg->imlList.size() > 0) { - if(segIt->imlList[i].associatedPPCAddress == 0 ) - continue; - //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) - // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); + for (size_t f = 0; f < seg->imlList.size() - 1; f++) + { + if (seg->imlList[f].IsSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } + } } - if( segmentPPCAddrMin != 0xFFFFFFFF ) + if (seg->nextSegmentBranchTaken) { - segIt->ppcAddrMin = segmentPPCAddrMin; - segIt->ppcAddrMax = segmentPPCAddrMax; + if (!seg->HasSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } } - else + if (seg->nextSegmentBranchNotTaken) { - segIt->ppcAddrMin = 0; - segIt->ppcAddrMax = 0; + // if branch not taken, flow must continue to next segment in sequence + cemu_assert_debug(seg->nextSegmentBranchNotTaken == nextSeg); } - } - // certain instructions can change the segment state - // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) - // jumpmarks mark the segment as a jump destination (within the same function) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - while (segIt->imlList.size() > 0) + // more detailed checks based on actual suffix instruction + if (seg->imlList.size() > 0) { - if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) + IMLInstruction* inst = seg->GetLastInstruction(); + if (inst->type == PPCREC_IML_TYPE_MACRO && inst->op_macro.param == PPCREC_IML_MACRO_B_FAR) { - // mark segment as enterable - if (segIt->isEnterable) - assert_dbg(); // should not happen? - segIt->isEnterable = true; - segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; - // remove ppc_enter instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; + cemu_assert_debug(!seg->GetBranchTaken()); + cemu_assert_debug(!seg->GetBranchNotTaken()); } - else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) + if (inst->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // mark segment as jump destination - if(segIt->isJumpDestination ) - assert_dbg(); // should not happen? - segIt->isJumpDestination = true; - segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; - // remove jumpmark instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; + cemu_assert_debug(seg->GetBranchTaken()); + cemu_assert_debug(seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP) + { + if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) + { + debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); + } + } + else + { + // proper error checking for branch-always (or branch-never if invert bit is set) + } } - else - break; } + //if (seg->list_prevSegments.empty()) + //{ + // cemu_assert_debug(seg->isEnterable); + //} + segIndex++; } - // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList2[0]->isEnterable = true; - ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; +#endif - // link segments for further inter-segment optimization - PPCRecompilerIML_linkSegments(&ppcImlGenContext); + + // todos: + // - basic block determination should look for the B(L) B(L) pattern. Or maybe just mark every bb without any input segments as an entry segment + + return true; +} + +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +{ + ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker + ppcImlGenContext.boundaryTracker = &boundaryTracker; + + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; + + // add entire range + ppcRecRange_t recRange; + recRange.ppcAddress = ppcRecFunc->ppcAddress; + recRange.ppcSize = ppcRecFunc->ppcSize; + ppcRecFunc->list_ranges.push_back(recRange); + // process ppc instructions +// ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); +// bool unsupportedInstructionFound = false; +// sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; +// sint32 unsupportedInstructionCount = 0; +// uint32 unsupportedInstructionLastOffset = 0; +// uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; +// uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; +// +// while(ppcImlGenContext.currentInstruction < endCurrentInstruction) +// { +// uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); +// ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; +// ppcImlGenContext.cyclesSinceLastBranch++; +// ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); +// if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) +// { +// // add PPCEnter for addresses that are in entryAddresses +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) +// { +// // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) +// uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); +// uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); +// if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) +// { +// // between two B(L) instructions +// // todo: for BL only if they are not inlineable +// +// bool canInlineFunction = false; +// if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) +// { +// uint32 li; +// PPC_OPC_TEMPL_I(opcodePrevious, li); +// sint32 inlineSize = 0; +// if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) +// canInlineFunction = true; +// } +// if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) +// { +// uint32 BO, BI, BD; +// PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); +// if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) +// { +// // after unconditional BCTR instruction +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// } +// } +// +// unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); +// if( unsupportedInstructionFound ) +// { +// unsupportedInstructionCount++; +// unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; +// unsupportedInstructionFound = false; +// //break; +// } +// } +// ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) +// if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) +// { +// debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); +// return false; +// } +// // optimize unused jumpmarks away +// // first, flag all jumpmarks as unused +// std::map map_jumpMarks; +// for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; +// } +// } +// // lastly, remove jumpmarks that still have the unused flag set +// sint32 currentImlIndex = 0; +// for(sint32 i=0; i end of segment after current instruction +// // If we encounter a jumpmark -> end of segment before current instruction +// // If we encounter ppc_enter -> end of segment before current instruction +// if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) +// { +// // segment ends after current instruction +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart+1; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex+1; +// } +// else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || +// ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) +// { +// // segment ends before current instruction +// if( segmentImlIndex > segmentStart ) +// { +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// } +// segmentImlIndex++; +// } +// if( segmentImlIndex != segmentStart ) +// { +// // final segment +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// // move iml instructions into the segments +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 imlStartIndex = segIt->startOffset; +// uint32 imlCount = segIt->count; +// if( imlCount > 0 ) +// { +// cemu_assert_debug(segIt->imlList.empty()); +// segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); +// +// } +// else +// { +// // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code +// cemu_assert_debug(segIt->imlList.empty()); +// } +// segIt->startOffset = 9999999; +// segIt->count = 9999999; +// } +// // clear segment-independent iml list +// free(ppcImlGenContext.imlList); +// ppcImlGenContext.imlList = nullptr; +// ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList +// // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 segmentPPCAddrMin = 0xFFFFFFFF; +// uint32 segmentPPCAddrMax = 0x00000000; +// for(sint32 i=0; i< segIt->imlList.size(); i++) +// { +// if(segIt->imlList[i].associatedPPCAddress == 0 ) +// continue; +// //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) +// // continue; // jumpmarks and no-op instructions must not affect segment ppc address range +// segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); +// segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); +// } +// if( segmentPPCAddrMin != 0xFFFFFFFF ) +// { +// segIt->ppcAddrMin = segmentPPCAddrMin; +// segIt->ppcAddrMax = segmentPPCAddrMax; +// } +// else +// { +// segIt->ppcAddrMin = 0; +// segIt->ppcAddrMax = 0; +// } +// } +// // certain instructions can change the segment state +// // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) +// // jumpmarks mark the segment as a jump destination (within the same function) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// while (segIt->imlList.size() > 0) +// { +// if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) +// { +// // mark segment as enterable +// if (segIt->isEnterable) +// assert_dbg(); // should not happen? +// segIt->isEnterable = true; +// segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; +// // remove ppc_enter instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) +// { +// // mark segment as jump destination +// if(segIt->isJumpDestination ) +// assert_dbg(); // should not happen? +// segIt->isJumpDestination = true; +// segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; +// // remove jumpmark instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else +// break; +// } +// } +// // the first segment is always enterable as the recompiled functions entrypoint +// ppcImlGenContext.segmentList2[0]->isEnterable = true; +// ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; +// +// // link segments for further inter-segment optimization +// PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) @@ -4215,129 +4922,132 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if( segIt->ppcAddrMin == 0 ) - continue; - // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions - uint32 lastPPCInstAddr = 0; - uint32 ppcCount2 = 0; - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - if (segIt->imlList[i].associatedPPCAddress == 0) - continue; - if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) - continue; - lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; - ppcCount2++; - } - //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - uint32 cycleCount = ppcCount2;// ppcCount / 4; - if( cycleCount > 0 ) - { - PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); - segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - segIt->imlList[0].op_macro.param = cycleCount; - } - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if( segIt->ppcAddrMin == 0 ) + // continue; + // // count number of PPC instructions in segment + // // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions + // uint32 lastPPCInstAddr = 0; + // uint32 ppcCount2 = 0; + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // if (segIt->imlList[i].associatedPPCAddress == 0) + // continue; + // if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) + // continue; + // lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; + // ppcCount2++; + // } + // //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions + // uint32 cycleCount = ppcCount2;// ppcCount / 4; + // if( cycleCount > 0 ) + // { + // PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); + // segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + // segIt->imlList[0].op_macro.param = cycleCount; + // } + //} return true; } void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) { - // find segments that have a (conditional) jump instruction that points in reverse direction of code flow - // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) - { - // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - if (imlSegment->imlList.empty()) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - continue; - // exclude non-infinite tight loops - if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) - continue; - // potential loop segment found, split this segment into four: - // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // P2: This segment contains the iml instructions of the original segment - // PEntry: This segment is used to enter the function, it jumps to P0 - // All segments are considered to be part of the same PPC instruction range - // The first segment also retains the jump destination and enterable properties from the original segment. - //debug_printf("--- Insert cycle counter check ---\n"); - - PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - imlSegment = NULL; - IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - // relink segments - IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // update segments - uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - if (imlSegmentP2->isEnterable) - enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddress = 0xFFFFFFFF; - imlSegmentP1->ppcAddress = 0xFFFFFFFF; - imlSegmentP2->ppcAddress = 0xFFFFFFFF; - cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // move segment properties from segment P2 to segment P0 - imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->isEnterable = false; - //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - imlSegmentP2->isJumpDestination = false; - imlSegmentP2->jumpDestinationPPCAddress = 0; - imlSegmentP2->isEnterable = false; - imlSegmentP2->enterPPCAddress = 0; - imlSegmentP2->ppcAddrMin = 0; - imlSegmentP2->ppcAddrMax = 0; - // setup enterable segment - if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - { - imlSegmentPEntry->isEnterable = true; - imlSegmentPEntry->ppcAddress = enterPPCAddress; - imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - } - // assign new jumpmark to segment P2 - imlSegmentP2->isJumpDestination = true; - imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - currentLoopEscapeJumpMarker++; - // create ppc_leave instruction in segment P1 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // create cycle-based conditional instruction in segment P0 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - imlSegmentP0->imlList[0].operation = 0; - imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // jump instruction for PEntry - PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); - - // skip the newly created segments - s += 2; - } + return; // deprecated + + //// find segments that have a (conditional) jump instruction that points in reverse direction of code flow + //// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. + //// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) + //uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located + //for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) + //{ + // // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) + // IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; + // if (imlSegment->imlList.empty()) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // continue; + + // // exclude non-infinite tight loops + // if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + // continue; + // // potential loop segment found, split this segment into four: + // // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. + // // P2: This segment contains the iml instructions of the original segment + // // PEntry: This segment is used to enter the function, it jumps to P0 + // // All segments are considered to be part of the same PPC instruction range + // // The first segment also retains the jump destination and enterable properties from the original segment. + // //debug_printf("--- Insert cycle counter check ---\n"); + + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + // imlSegment = NULL; + // IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + // IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + // IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + // // create entry point segment + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + // IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + // // relink segments + // IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + // IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + // IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + // IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + // // update segments + // uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + // if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddress = 0xFFFFFFFF; + // imlSegmentP1->ppcAddress = 0xFFFFFFFF; + // imlSegmentP2->ppcAddress = 0xFFFFFFFF; + // cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + // // move segment properties from segment P2 to segment P0 + // imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + // imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->isEnterable = false; + // //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + // imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + // imlSegmentP2->isJumpDestination = false; + // imlSegmentP2->jumpDestinationPPCAddress = 0; + // imlSegmentP2->isEnterable = false; + // imlSegmentP2->enterPPCAddress = 0; + // imlSegmentP2->ppcAddrMin = 0; + // imlSegmentP2->ppcAddrMax = 0; + // // setup enterable segment + // if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + // { + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + // } + // // assign new jumpmark to segment P2 + // imlSegmentP2->isJumpDestination = true; + // imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + // currentLoopEscapeJumpMarker++; + // // create ppc_leave instruction in segment P1 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + // imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + // imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + // imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // create cycle-based conditional instruction in segment P0 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + // imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + // imlSegmentP0->imlList[0].operation = 0; + // imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // jump instruction for PEntry + // PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + // PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + + // // skip the newly created segments + // s += 2; + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index ccb0fc83f..7b4b94fbb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -3,63 +3,67 @@ IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) - { - return segIt; - } - } - debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + __debugbreak(); return nullptr; + //for(IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) + // { + // return segIt; + // } + //} + //debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + //return nullptr; } void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) { - size_t segCount = ppcImlGenContext->segmentList2.size(); - for(size_t s=0; ssegmentList2[s]; + __debugbreak(); // outdated - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); - IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; - // handle empty segment - if( imlSegment->imlList.empty()) - { - if (isLastSegment == false) - IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment - else - imlSegment->nextSegmentIsUncertain = true; - continue; - } - // check last instruction of segment - IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); - if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // find destination segment by ppc jump address - IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - if( jumpDestSegment ) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); - } - else - { - imlSegment->nextSegmentIsUncertain = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // currently we assume that the next segment is unknown for all macros - imlSegment->nextSegmentIsUncertain = true; - } - else - { - // all other instruction types do not branch - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - } - } + //size_t segCount = ppcImlGenContext->segmentList2.size(); + //for(size_t s=0; ssegmentList2[s]; + + // bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); + // IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; + // // handle empty segment + // if( imlSegment->imlList.empty()) + // { + // if (isLastSegment == false) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment + // else + // imlSegment->nextSegmentIsUncertain = true; + // continue; + // } + // // check last instruction of segment + // IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); + // if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) + // { + // // find destination segment by ppc jump address + // IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + // if( jumpDestSegment ) + // { + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + // } + // else + // { + // imlSegment->nextSegmentIsUncertain = true; + // } + // } + // else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) + // { + // // currently we assume that the next segment is unknown for all macros + // imlSegment->nextSegmentIsUncertain = true; + // } + // else + // { + // // all other instruction types do not branch + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // } + //} } void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) diff --git a/src/Cemu/Logging/CemuLogging.h b/src/Cemu/Logging/CemuLogging.h index a671ce51b..81b018f5a 100644 --- a/src/Cemu/Logging/CemuLogging.h +++ b/src/Cemu/Logging/CemuLogging.h @@ -39,7 +39,6 @@ enum class LogType : sint32 NN_SL = 26, TextureReadback = 29, - ProcUi = 39, nlibcurl = 41, @@ -47,6 +46,7 @@ enum class LogType : sint32 NFC = 41, NTAG = 42, + Recompiler = 60, }; template <> From a5f6faac8a612b2cc26473f7c4df20b568567b7a Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 10:48:44 +0100 Subject: [PATCH 13/64] PPCRec: Fix merge conflicts --- .../Recompiler/BackendX64/BackendX64.cpp | 98 ++----------------- .../Recompiler/BackendX64/BackendX64.h | 7 -- .../Recompiler/BackendX64/BackendX64FPU.cpp | 25 ++--- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 35 +++++++ 4 files changed, 57 insertions(+), 108 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 1267be79a..5b3f0830f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,11 +8,6 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" -bool s_hasLZCNTSupport = false; -bool s_hasMOVBESupport = false; -bool s_hasBMI2Support = false; -bool s_hasAVXSupport = false; - sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping { REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX @@ -374,7 +369,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p { x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); } - if( IMLBackendX64_HasExtensionMOVBE() && switchEndian ) + if( g_CPUFeatures.x86.movbe && switchEndian ) { if (indexed) { @@ -412,7 +407,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p { x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - if(IMLBackendX64_HasExtensionMOVBE() && switchEndian ) + if(g_CPUFeatures.x86.movbe && switchEndian ) { x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) @@ -470,7 +465,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p assert_dbg(); if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) - if(IMLBackendX64_HasExtensionMOVBE()) + if(g_CPUFeatures.x86.movbe) { x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) @@ -521,7 +516,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 valueRegister; - if ((swapEndian == false || IMLBackendX64_HasExtensionMOVBE()) && realRegisterMem != realRegisterData) + if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData) { valueRegister = realRegisterData; } @@ -530,11 +525,11 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); valueRegister = REG_RESV_TEMP; } - if (!IMLBackendX64_HasExtensionMOVBE() && swapEndian) + if (!g_CPUFeatures.x86.movbe && swapEndian) x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister); if (indexed) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (IMLBackendX64_HasExtensionMOVBE() && swapEndian) + if (g_CPUFeatures.x86.movbe && swapEndian) x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); else x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); @@ -764,7 +759,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) - if(IMLBackendX64_HasExtensionLZCNT()) + if(g_CPUFeatures.x86.lzcnt) { x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); } @@ -1482,12 +1477,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SRW) + if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) { // use BMI2 SHRX if available x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - else if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SLW) + else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW) { // use BMI2 SHLX if available x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); @@ -2632,78 +2627,3 @@ void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions() PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited); } - -bool IMLBackendX64_HasExtensionLZCNT() -{ - return s_hasLZCNTSupport; -} - -bool IMLBackendX64_HasExtensionMOVBE() -{ - return s_hasMOVBESupport; -} - -bool IMLBackendX64_HasExtensionBMI2() -{ - return s_hasBMI2Support; -} - -bool IMLBackendX64_HasExtensionAVX() -{ - return s_hasAVXSupport; -} - -void IMLBackendX64_Init() -{ - // init x64 recompiler instance data - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL; - ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL; - ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL); - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31); - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF; - ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL; - ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL; - ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0; - ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0; - ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0; - ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0; - ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f; - ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f; - *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000; - *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000; - ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000; - - // mxcsr - ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000; - ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80; - - // query processor extensions - int cpuInfo[4]; - cpuid(cpuInfo, 0x80000001); - s_hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0; - cpuid(cpuInfo, 0x1); - s_hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0; - s_hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0; - cpuidex(cpuInfo, 0x7, 0); - s_hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0; - - forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", s_hasLZCNTSupport ? "LZCNT " : "", s_hasMOVBESupport ? "MOVBE " : "", s_hasAVXSupport ? "AVX " : ""); -} diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 0548f402d..5a2b75000 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -134,13 +134,6 @@ enum #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register -void IMLBackendX64_Init(); - -bool IMLBackendX64_HasExtensionLZCNT(); -bool IMLBackendX64_HasExtensionMOVBE(); -bool IMLBackendX64_HasExtensionBMI2(); -bool IMLBackendX64_HasExtensionAVX(); - bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index e01b0ac5b..b70a9a319 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -1,6 +1,7 @@ #include "../PPCRecompiler.h" #include "../IML/IML.h" #include "BackendX64.h" +#include "Common/cpu_features.h" #include "asm/x64util.h" // for recompiler_fres / frsqrte @@ -87,7 +88,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, { x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg); - if (IMLBackendX64_HasExtensionMOVBE()) + if (g_CPUFeatures.x86.movbe) { x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); } @@ -99,7 +100,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, } else { - if (IMLBackendX64_HasExtensionMOVBE()) + if (g_CPUFeatures.x86.movbe) { x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); } @@ -109,7 +110,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); } } - if (IMLBackendX64_HasExtensionAVX()) + if (g_CPUFeatures.x86.avx) { x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); } @@ -281,19 +282,19 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio { x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - if(IMLBackendX64_HasExtensionMOVBE()) + if(g_CPUFeatures.x86.movbe) x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); else x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); } else { - if(IMLBackendX64_HasExtensionMOVBE()) + if(g_CPUFeatures.x86.movbe) x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); else x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); } - if(IMLBackendX64_HasExtensionMOVBE() == false ) + if(g_CPUFeatures.x86.movbe == false ) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP); @@ -309,7 +310,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio } else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 ) { - if( IMLBackendX64_HasExtensionAVX() ) + if( g_CPUFeatures.x86.avx ) { if( indexed ) { @@ -413,14 +414,14 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext { x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - if (IMLBackendX64_HasExtensionMOVBE() == false) + if (g_CPUFeatures.x86.movbe == false) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if (indexed) { cemu_assert_debug(memReg != memRegEx); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); } - if (IMLBackendX64_HasExtensionMOVBE()) + if (g_CPUFeatures.x86.movbe) x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); else x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); @@ -596,7 +597,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM); x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); } - if(IMLBackendX64_HasExtensionMOVBE() == false ) + if(g_CPUFeatures.x86.movbe == false ) x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if( indexed ) { @@ -604,7 +605,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti assert_dbg(); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - if(IMLBackendX64_HasExtensionMOVBE()) + if(g_CPUFeatures.x86.movbe) x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); else x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); @@ -1017,7 +1018,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); } - else if (IMLBackendX64_HasExtensionAVX()) + else if (g_CPUFeatures.x86.avx) { x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 8ec2f545b..2565e3ee5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -567,6 +567,41 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr) #if defined(ARCH_X86_64) void PPCRecompiler_initPlatform() { + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL; + ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL; + ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL); + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31); + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF; + ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL; + ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL; + ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0; + ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0; + ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0; + ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0; + ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f; + ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f; + *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000; + *(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000; + ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000; + // mxcsr ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000; ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80; From 8d972d2500b55abf4e6707082284695ef1a273ae Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 14:13:46 +0100 Subject: [PATCH 14/64] PPCRec: Unify BCCTR and BCLR code Instead of having fixed macros for BCCTR/BCCTRL/BCLR/BCLRL we now have only one single macro instruction that takes the jump destination as a register parameter. This also allows us to reuse an already loaded LR register (by something like MTLR) instead of loading it again from memory. As a necessary requirement for this: The register allocator now has support for read operations in suffix instructions --- .../Recompiler/BackendX64/BackendX64.cpp | 28 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 16 +- .../Recompiler/IML/IMLInstruction.cpp | 12 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 14 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 11 +- .../Recompiler/PPCRecompilerImlGen.cpp | 279 ++++-------------- 6 files changed, 92 insertions(+), 268 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 5b3f0830f..af0dffcb9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -119,30 +119,12 @@ void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL ) + if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { - uint32 currentInstructionAddress = imlInstruction->op_macro.param; - // MOV EDX, [SPR_LR] - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); - // if BLRL, then update SPR LR - if (imlInstruction->operation == PPCREC_IML_MACRO_BLRL) - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), currentInstructionAddress + 4); - // JMP [offset+RDX*(8/4)+R15] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA4); - x64Gen_writeU8(x64GenContext, 0x57); - x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL ) - { - uint32 currentInstructionAddress = imlInstruction->op_macro.param; - // MOV EDX, [SPR_CTR] - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); - // if BCTRL, then update SPR LR - if (imlInstruction->operation == PPCREC_IML_MACRO_BCTRL) - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), currentInstructionAddress + 4); + uint32 branchDstReg = tempToRealRegister(imlInstruction->op_macro.param); + if(REG_RDX != branchDstReg) + x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg); + // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX // JMP [offset+RDX*(8/4)+R15] x64Gen_writeU8(x64GenContext, 0x41); x64Gen_writeU8(x64GenContext, 0xFF); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 560f5de1f..6486f2d0b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -294,21 +294,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_MACRO) { - if (inst.operation == PPCREC_IML_MACRO_BLR) + if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) { - strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_BLRL) - { - strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_BCTR) - { - strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_BCTRL) - { - strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", inst.op_macro.param, (sint32)inst.op_macro.paramU16); + strOutput.addFmt("MACRO B_TO_REG t{}", inst.op_macro.param); } else if (inst.operation == PPCREC_IML_MACRO_BL) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 46ed886d0..449e01c5b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -130,10 +130,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_MACRO) { - if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BLRL || operation == PPCREC_IML_MACRO_BCTR || operation == PPCREC_IML_MACRO_BCTRL || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB) + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB) { // no effect on registers } + else if (operation == PPCREC_IML_MACRO_B_TO_REG) + { + registersUsed->readNamedReg1 = op_macro.param; + } else cemu_assert_unimplemented(); } @@ -480,10 +484,14 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (type == PPCREC_IML_TYPE_MACRO) { - if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BLRL || operation == PPCREC_IML_MACRO_BCTR || operation == PPCREC_IML_MACRO_BCTRL || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB || operation == PPCREC_IML_MACRO_COUNT_CYCLES) + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB || operation == PPCREC_IML_MACRO_COUNT_CYCLES) { // no effect on registers } + else if (operation == PPCREC_IML_MACRO_B_TO_REG) + { + op_macro.param = replaceRegisterMultiple(op_macro.param, gprRegisterSearched, gprRegisterReplaced); + } else { cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index c86bb6103..32177e518 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -97,10 +97,8 @@ enum enum { - PPCREC_IML_MACRO_BLR, // macro for BLR instruction code - PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code - PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code - PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code + PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR) + PPCREC_IML_MACRO_BL, // call to different function (can be within same function) PPCREC_IML_MACRO_B_FAR, // branch to different function PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount @@ -130,7 +128,7 @@ enum { PPCREC_CR_MODE_COMPARE_SIGNED, PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare - // others: PPCREC_CR_MODE_ARITHMETIC, + PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code) PPCREC_CR_MODE_LOGICAL, }; @@ -398,11 +396,9 @@ struct IMLInstruction bool IsSuffixInstruction() const { - if (type == PPCREC_IML_TYPE_MACRO && (operation == PPCREC_IML_MACRO_BLR || operation == PPCREC_IML_MACRO_BCTR) || - type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL || + if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR || - type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BLRL || - type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BCTRL || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index b90aa9b18..6b35239a6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1033,8 +1033,8 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, while (index < imlSegment->imlList.size()) { // end loop at suffix instruction - if (imlSegment->imlList[index].IsSuffixInstruction()) - break; + //if (imlSegment->imlList[index].IsSuffixInstruction()) + // break; // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); for (sint32 t = 0; t < 4; t++) @@ -1125,9 +1125,10 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { - // end loop at suffix instruction - if (imlSegment->imlList[index].IsSuffixInstruction()) - break; + // we parse suffix instructions too for any potential input registers (writes not allowed), but note that any spills/stores need to happen before the suffix instruction + //// end loop at suffix instruction + //if (imlSegment->imlList[index].IsSuffixInstruction()) + // break; // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); // handle accessed GPR diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 6d488b174..41b5ff51c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -797,194 +797,85 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) return true; } -bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// BCCTR or BCLR +bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 sprReg) { PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); - + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); uint32 crRegister = BI/4; uint32 crBit = BI%4; - uint32 jumpCondition = 0; - - bool conditionMustBeTrue = (BO&8)!=0; - bool useDecrementer = (BO&4)==0; // bit not set -> decrement - bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 - bool ignoreCondition = (BO&16)!=0; - bool saveLR = (opcode&PPC_OPC_LK)!=0; - // since we skip this instruction if the condition is true, we need to invert the logic - //bool invertedConditionMustBeTrue = !conditionMustBeTrue; - if( useDecrementer ) - { - cemu_assert_debug(false); - return false; // unsupported - } - else + uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); + if (LK) { - if( ignoreCondition ) + if (sprReg == SPR_LR) { - // branch always, no condition and no decrementer check - cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); - cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - if( saveLR ) - { - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } - else - { - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } - } - else - { - cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); - cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - - //debug_printf("[Rec-Disable] BCLR with condition or LR\n"); - //return false; - - // store LR - if( saveLR ) - { - cemu_assert_unimplemented(); // todo - this is difficult to handle because it needs to jump to the unmodified LR (we should cache it in a register which we pass to the macro?) - return false; - - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if(conditionMustBeTrue) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - - //if(conditionMustBeTrue) - // ppcImlGenContext->emitInst().make_debugbreak(ppcImlGenContext->ppcAddressOfCurrentInstruction); - - // write the BCTR instruction to a new segment that is set as a branch target for the current segment - PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; - IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); - - bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + // if the branch target is LR, then preserve it in a temporary + cemu_assert_suspicious(); // this case needs testing + uint32 tmpRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, nullptr, PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); + branchDestReg = tmpRegister; } + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, 0, false, false, PPC_REC_INVALID_REGISTER, 0); } - return true; -} - -bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); - - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); - - uint32 crRegister = BI/4; - uint32 crBit = BI%4; - uint32 jumpCondition = 0; - - bool conditionMustBeTrue = (BO&8)!=0; - bool useDecrementer = (BO&4)==0; // bit not set -> decrement - bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 - bool ignoreCondition = (BO&16)!=0; - bool saveLR = (opcode&PPC_OPC_LK)!=0; - - // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; - if( useDecrementer ) + if (!BO.decrementerIgnore()) { - assert_dbg(); - // if added, dont forget inverted logic - debug_printf("Rec: BCLR unsupported decrementer\n"); - return false; // unsupported + cemu_assert_unimplemented(); + return false; } - else + else if (!BO.conditionIgnore()) { - if( ignoreCondition ) + // no decrementer but CR check + cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + // generate jump condition + uint32 jumpCondition = 0; + if (!BO.conditionInverted()) { - // branch always, no condition and no decrementer - if( saveLR ) - { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - if (saveLR) - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - else - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + // CR bit must be set + if (crBit == 0) + jumpCondition = PPCREC_JUMP_CONDITION_L; + else if (crBit == 1) + jumpCondition = PPCREC_JUMP_CONDITION_G; + else if (crBit == 2) + jumpCondition = PPCREC_JUMP_CONDITION_E; + else if (crBit == 3) + jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } else { - // get jump condition - if (invertedConditionMustBeTrue) - { - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } + if (crBit == 0) + jumpCondition = PPCREC_JUMP_CONDITION_GE; + else if (crBit == 1) + jumpCondition = PPCREC_JUMP_CONDITION_LE; + else if (crBit == 2) + jumpCondition = PPCREC_JUMP_CONDITION_NE; + else if (crBit == 3) + jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; + } - // debug checks - //if (saveLR) - // cemu_assert_debug(ppcImlGenContext->currentBasicBlock->); + // write the dynamic branch instruction to a new segment that is set as a branch target for the current segment + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - // we always store LR - if (saveLR) - { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction + 4) & 0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, !BO.conditionInverted()); - // write the BCTR instruction to a new segment that is set as a branch target for the current segment - __debugbreak(); - PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; - IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - //PPCBasicBlockInfo* bctrSeg = currentBasicBlock->Get - __debugbreak(); + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); + } + else + { + // branch always, no condition and no decrementer check + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); - - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } } return true; } @@ -3333,8 +3224,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 19: // opcode category 19 switch (PPC_getBits(opcode, 30, 10)) { - case 16: - if (PPCRecompilerImlGen_BCLR(ppcImlGenContext, opcode) == false) + case 16: // BCLR + if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false) unsupportedInstructionFound = true; break; case 129: @@ -3365,8 +3256,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_CROR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 528: - if (PPCRecompilerImlGen_BCCTR(ppcImlGenContext, opcode) == false) + case 528: // BCCTR + if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_CTR) == false) unsupportedInstructionFound = true; break; default: @@ -4008,14 +3899,6 @@ bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& bou case Espresso::PrimaryOpcode::GROUP_19: switch (Espresso::GetGroup19Opcode(opcode)) { - //case Espresso::Opcode19::BCLR: - ////case Espresso::Opcode19::BCCTR: - //{ - // continueDefaultPath = false; // todo - set this to true if this instruction has a condition (including decrementer check) - // makeNextInstEnterable = Espresso::DecodeLK(opcode); - // return true; - //} - case Espresso::Opcode19::BCLR: case Espresso::Opcode19::BCCTR: { @@ -4034,19 +3917,6 @@ bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& bou case Espresso::PrimaryOpcode::GROUP_31: switch (Espresso::GetGroup31Opcode(opcode)) { - //case Espresso::Opcode31::TW: - // continueDefaultPath = true; - // return true; - //case Espresso::Opcode31::MFTB: - // continueDefaultPath = true; - // return true; - //case Espresso::Opcode19::BCLR: - //case Espresso::Opcode19::BCCTR: - //{ - // continueDefaultPath = false; - // makeNextInstEnterable = Espresso::DecodeLK(opcode); - // return true; - //} default: break; } @@ -4336,7 +4206,6 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { bool isLastSegment = segIt == ppcImlGenContext.segmentList2.back(); - //IMLSegment* nextSegment = isLastSegment ? nullptr : ppcImlGenContext->segmentList2[s + 1]; // handle empty segment if (segIt->imlList.empty()) { @@ -4352,29 +4221,13 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext { cemu_assert_debug(segIt->GetBranchNotTaken()); } - - //// find destination segment by ppc jump address - //IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - //if (jumpDestSegment) - //{ - // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); - //} - //else - //{ - // imlSegment->nextSegmentIsUncertain = true; - //} } else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) { auto macroType = imlInstruction->operation; switch (macroType) { - case PPCREC_IML_MACRO_BLR: - case PPCREC_IML_MACRO_BLRL: - case PPCREC_IML_MACRO_BCTR: - case PPCREC_IML_MACRO_BCTRL: + case PPCREC_IML_MACRO_B_TO_REG: case PPCREC_IML_MACRO_BL: case PPCREC_IML_MACRO_B_FAR: case PPCREC_IML_MACRO_HLE: @@ -4500,7 +4353,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction { if (seg->imlList[f].IsSuffixInstruction()) { - debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", segIndex); + debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", (int)segIndex); IMLDebug_Dump(&ppcImlGenContext); __debugbreak(); } @@ -4510,7 +4363,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction { if (!seg->HasSuffixInstruction()) { - debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", segIndex); + debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", (int)segIndex); IMLDebug_Dump(&ppcImlGenContext); __debugbreak(); } @@ -4540,7 +4393,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction { if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) { - debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", segIndex); + debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", (int)segIndex); IMLDebug_Dump(&ppcImlGenContext); cemu_assert_error(); } @@ -4551,10 +4404,6 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction } } } - //if (seg->list_prevSegments.empty()) - //{ - // cemu_assert_debug(seg->isEnterable); - //} segIndex++; } #endif From 874e376361edcc38d40aae14e42dc2e43511a623 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 15:33:01 +0100 Subject: [PATCH 15/64] PPCRec: Fix single segment loop not being detected Also removed associatedPPCAddress field from IMLInstruction as it's no longer used --- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 6 +- .../Recompiler/PPCRecompilerImlGen.cpp | 105 +----------------- 2 files changed, 5 insertions(+), 106 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 6486f2d0b..675329a6f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -152,11 +152,11 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool for (sint32 i = 0; i < imlSegment->imlList.size(); i++) { const IMLInstruction& inst = imlSegment->imlList[i]; - // don't log NOP instructions unless they have an associated PPC address - if (inst.type == PPCREC_IML_TYPE_NO_OP && inst.associatedPPCAddress == MPTR_NULL) + // don't log NOP instructions + if (inst.type == PPCREC_IML_TYPE_NO_OP) continue; strOutput.reset(); - strOutput.addFmt("{:08x} ", inst.associatedPPCAddress); + strOutput.addFmt("{:02x} ", i); if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { if (inst.type == PPCREC_IML_TYPE_R_NAME) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 41b5ff51c..6e927fc46 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -4029,7 +4029,6 @@ IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenConte { IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); - //IMLSegment* continuedSegment = ppcImlGenContext.NewSegment(); IMLSegment* continuedSegment = ppcImlGenContext.InsertSegment(ppcImlGenContext.GetSegmentIndex(writeSegment) + 1); continuedSegment->SetLinkBranchTaken(writeSegment->GetBranchTaken()); @@ -4066,15 +4065,9 @@ void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenCo void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) { IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); - //if (imlSegment->imlList.empty()) - // return; - //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - // return; - //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - // return; if (!basicBlockInfo.hasBranchTarget) return; - if (basicBlockInfo.branchTarget >= basicBlockInfo.startAddress) + if (basicBlockInfo.branchTarget > basicBlockInfo.startAddress) return; // exclude non-infinite tight loops @@ -4089,116 +4082,22 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P // The first segment also retains the jump destination and enterable properties from the original segment. //debug_printf("--- Insert cycle counter check ---\n"); - // make the segment enterable so execution can return after checking basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); - // what we know about the crash: - // It doesnt happen with cycle checks disabled - // The debugbreak emitted here is only encountered twice before it crashes - // it doesnt seem to go into the alternative branch (cycles negative) -> tested (debugbreak in exit segment doesnt trigger) - // Its the enterable segment that causes issues? -> I removed the enterable statement and it still happened - // Maybe some general issue with getting x64 offsets for enterable segments.. - - // possible explanations: - // issue with the cycle check / exit logic - // returning from exit is causing the issue - // Segments can get marked as jump destination which we no longer do -> Deleted old code and added asserts - IMLInstruction* inst = splitSeg->AppendInstruction(); inst->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; inst->operation = 0; inst->crRegister = PPC_REC_INVALID_REGISTER; inst->op_conditionalJump.jumpmarkAddress = 0xFFFFFFFF; inst->associatedPPCAddress = 0xFFFFFFFF; - // PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK - - //splitSeg->AppendInstruction()->make_macro(PPCREC_IML_TYPE_MACRO, ) IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); splitSeg->SetLinkBranchTaken(exitSegment); - - //exitSegment->AppendInstruction()->make_debugbreak(); - - inst = exitSegment->AppendInstruction();// ->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress); - inst->type = PPCREC_IML_TYPE_MACRO; - inst->operation = PPCREC_IML_MACRO_LEAVE; - inst->crRegister = PPC_REC_INVALID_REGISTER; - inst->op_macro.param = basicBlockInfo.startAddress; - inst->associatedPPCAddress = basicBlockInfo.startAddress; - - - //debug_printf("----------------------------------------\n"); - //IMLDebug_Dump(&ppcImlGenContext); - //__debugbreak(); - - //ppcImlGenContext.NewSegment(); - - //PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - //imlSegment = NULL; - //IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - //IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - //IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - //// create entry point segment - //PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - //IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - //// relink segments - //IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - //IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - //IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - //IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - //// update segments - //uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - //if (imlSegmentP2->isEnterable) - // enterPPCAddress = imlSegmentP2->enterPPCAddress; - //imlSegmentP0->ppcAddress = 0xFFFFFFFF; - //imlSegmentP1->ppcAddress = 0xFFFFFFFF; - //imlSegmentP2->ppcAddress = 0xFFFFFFFF; - //cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - //// move segment properties from segment P2 to segment P0 - //imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - //imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - //imlSegmentP0->isEnterable = false; - ////imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - //imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - //imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - //imlSegmentP2->isJumpDestination = false; - //imlSegmentP2->jumpDestinationPPCAddress = 0; - //imlSegmentP2->isEnterable = false; - //imlSegmentP2->enterPPCAddress = 0; - //imlSegmentP2->ppcAddrMin = 0; - //imlSegmentP2->ppcAddrMax = 0; - //// setup enterable segment - //if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - //{ - // imlSegmentPEntry->isEnterable = true; - // imlSegmentPEntry->ppcAddress = enterPPCAddress; - // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - //} - //// assign new jumpmark to segment P2 - //imlSegmentP2->isJumpDestination = true; - //imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - //currentLoopEscapeJumpMarker++; - //// create ppc_leave instruction in segment P1 - //PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - //imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - //imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - //imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - //imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - //imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - //// create cycle-based conditional instruction in segment P0 - //PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - //imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - //imlSegmentP0->imlList[0].operation = 0; - //imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - //imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - //imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - //// jump instruction for PEntry - //PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - //PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0); } void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) From 93f56159a152ede00d760a1c05bb5b6e2f08dd82 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 15:44:50 +0100 Subject: [PATCH 16/64] PPCRec: Remove now unused PPC_ENTER and jumpMarkAddress --- .../Recompiler/BackendX64/BackendX64.cpp | 14 +--- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 6 +- .../Recompiler/IML/IMLInstruction.cpp | 16 ----- .../Espresso/Recompiler/IML/IMLInstruction.h | 13 +--- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 1 - .../Recompiler/PPCRecompilerImlGen.cpp | 68 +++++++------------ 6 files changed, 32 insertions(+), 86 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index af0dffcb9..44a2c78ec 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1964,8 +1964,8 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { // deprecated (jump to jumpmark) __debugbreak(); // deprecated - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmp_imm32(x64GenContext, 0); + //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + //x64Gen_jmp_imm32(x64GenContext, 0); } } else @@ -2135,12 +2135,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc return false; } - -void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; -} - void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -2346,10 +2340,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { // no op } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) { PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 675329a6f..d52192d8e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -240,10 +240,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { strOutput.addFmt("jm_{:08x}:", inst.op_jumpmark.address); } - else if (inst.type == PPCREC_IML_TYPE_PPC_ENTER) - { - strOutput.addFmt("ppcEnter_{:08x}:", inst.op_ppcEnter.ppcAddress); - } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) { @@ -286,7 +282,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add("JALW"); // jump always else cemu_assert_unimplemented(); - strOutput.addFmt(" jm_{:08x} (cr{})", inst.op_conditionalJump.jumpmarkAddress, inst.crRegister); + strOutput.addFmt(" (cr{})", inst.crRegister); } else if (inst.type == PPCREC_IML_TYPE_NO_OP) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 449e01c5b..cd5c1ade1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -177,10 +177,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { // no effect on registers } - else if (type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { // fpr operation @@ -535,10 +531,6 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste { // no effect on registers } - else if (type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -679,10 +671,6 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // no effect on registers } - else if (type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); @@ -797,10 +785,6 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // no effect on registers } - else if (type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 32177e518..a883bf919 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -151,7 +151,6 @@ enum PPCREC_IML_TYPE_MACRO, PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 - PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) // conditional PPCREC_IML_TYPE_CONDITIONAL_R_S32, @@ -270,7 +269,6 @@ struct IMLInstruction uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated - uint32 associatedPPCAddress; // ppc address that is associated with this instruction union { struct @@ -322,7 +320,6 @@ struct IMLInstruction }op_macro; struct { - uint32 jumpmarkAddress; bool jumpAccordingToSegment; //IMLSegment* destinationSegment; // if set, this replaces jumpmarkAddress uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) uint8 crRegisterIndex; @@ -402,7 +399,6 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || - type == PPCREC_IML_TYPE_PPC_ENTER || type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) return true; @@ -430,14 +426,11 @@ struct IMLInstruction op_macro.paramU16 = paramU16; } - void make_ppcEnter(uint32 ppcAddress) + void make_cjump_cycle_check() { - cemu_assert_suspicious(); // removed - type = PPCREC_IML_TYPE_PPC_ENTER; + type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; operation = 0; - op_ppcEnter.ppcAddress = ppcAddress; - op_ppcEnter.x64Offset = 0; - associatedPPCAddress = 0; + crRegister = PPC_REC_INVALID_REGISTER; } void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index cd0f07c07..3ded8c790 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -123,7 +123,6 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte { // convert to NO-OP instruction imlInstructionItr.type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr.associatedPPCAddress = 0; } } imlIndex++; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 6e927fc46..652a7b9e7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -157,19 +157,19 @@ void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlG { __debugbreak(); - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - else - memset(imlInstruction, 0, sizeof(IMLInstruction)); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; + //// jump + //if (imlInstruction == NULL) + // imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + //else + // memset(imlInstruction, 0, sizeof(IMLInstruction)); + //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; + //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; + //imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; + //imlInstruction->op_conditionalJump.crRegisterIndex = 0; + //imlInstruction->op_conditionalJump.crBitIndex = 0; + //imlInstruction->op_conditionalJump.bitMustBeSet = false; } // jump based on segment branches @@ -178,10 +178,8 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* // jump if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->associatedPPCAddress = 0; imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpmarkAddress = 0; imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; imlInstruction->op_conditionalJump.crRegisterIndex = 0; @@ -218,16 +216,16 @@ void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext { __debugbreak(); - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; + //// conditional jump + //IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; + //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; + //imlInstruction->op_conditionalJump.condition = jumpCondition; + //imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + //imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + //imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) @@ -4073,26 +4071,12 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P // exclude non-infinite tight loops if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) return; - // potential loop segment found, split this segment into four: - // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // P1: This segment contains the ppc_leave instruction - // P2: This segment contains the iml instructions of the original segment - // PEntry: This segment is used to enter the function, it jumps to P0 - // All segments are considered to be part of the same PPC instruction range - // The first segment also retains the jump destination and enterable properties from the original segment. - //debug_printf("--- Insert cycle counter check ---\n"); - - // make the segment enterable so execution can return after checking + + // make the segment enterable so execution can return after passing a check basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); - - IMLInstruction* inst = splitSeg->AppendInstruction(); - inst->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - inst->operation = 0; - inst->crRegister = PPC_REC_INVALID_REGISTER; - inst->op_conditionalJump.jumpmarkAddress = 0xFFFFFFFF; - inst->associatedPPCAddress = 0xFFFFFFFF; + splitSeg->AppendInstruction()->make_cjump_cycle_check(); IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); splitSeg->SetLinkBranchTaken(exitSegment); From 9dc820795fa344ad82852e16088d0c0d4c651e33 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 16:55:30 +0100 Subject: [PATCH 17/64] PPCRec: Clean up unused flags --- .../Recompiler/BackendX64/BackendX64.cpp | 4 - .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 4 - .../Recompiler/IML/IMLInstruction.cpp | 16 -- .../Espresso/Recompiler/IML/IMLInstruction.h | 16 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 12 +- .../Recompiler/PPCRecompilerImlGen.cpp | 266 +----------------- 6 files changed, 13 insertions(+), 305 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 44a2c78ec..008ad1d13 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -2332,10 +2332,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no op - } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { // no op diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index d52192d8e..008c2fadb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -236,10 +236,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } - else if (inst.type == PPCREC_IML_TYPE_JUMPMARK) - { - strOutput.addFmt("jm_{:08x}:", inst.op_jumpmark.address); - } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index cd5c1ade1..f471c827a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -173,10 +173,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { // only affects cr register } - else if (type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { // fpr operation @@ -527,10 +523,6 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste { // only affects cr register } - else if (type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -667,10 +659,6 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // only affects cr register } - else if (type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); @@ -781,10 +769,6 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // only affects cr register } - else if (type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index a883bf919..871e7966b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -1,11 +1,5 @@ #pragma once -#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0) -#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1) -#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision -#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used - - enum { PPCREC_IML_OP_ASSIGN, // '=' operator @@ -137,7 +131,6 @@ enum { PPCREC_IML_TYPE_NONE, PPCREC_IML_TYPE_NO_OP, // no-op instruction - PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction) PPCREC_IML_TYPE_R_R, // r* (op) *r PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* @@ -175,7 +168,6 @@ enum PPCREC_NAME_SPR0 = 2000, PPCREC_NAME_FPR0 = 3000, PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 - //PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away) }; // special cases for LOAD/STORE @@ -406,10 +398,12 @@ struct IMLInstruction } // instruction setters - void make_jumpmark(uint32 address) + void make_no_op() { - type = PPCREC_IML_TYPE_JUMPMARK; - op_jumpmark.address = address; + type = PPCREC_IML_TYPE_NO_OP; + operation = 0; + crRegister = PPC_REC_INVALID_REGISTER; + crMode = 0; } void make_debugbreak(uint32 currentPPCAddress = 0) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 3ded8c790..a0b2a5179 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -121,8 +121,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte { if( imlInstructionItr.op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) { - // convert to NO-OP instruction - imlInstructionItr.type = PPCREC_IML_TYPE_NO_OP; + imlInstructionItr.make_no_op(); } } imlIndex++; @@ -191,7 +190,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // name_unusedRegister = unusedRegister IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if( replacedRegisterIsUsed ) + if (replacedRegisterIsUsed) { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -199,7 +198,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; } else - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; + imlInstructionItr->make_no_op(); imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; @@ -216,7 +215,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // unusedRegister = name_unusedRegister imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if( replacedRegisterIsUsed ) + if (replacedRegisterIsUsed) { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -224,7 +223,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; } else - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; + imlInstructionItr->make_no_op(); } else break; @@ -1156,7 +1155,6 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) continue; // not safe - //hasUnsafeInstructions = true; if (unsafeInstructionIndex == -1) unsafeInstructionIndex = i; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 652a7b9e7..e3c818a6e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -191,10 +191,7 @@ void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlG { if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_NO_OP; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; + imlInstruction->make_no_op(); } void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) @@ -252,7 +249,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppc imlInstruction->op_storeLoad.registerMem = registerMemory; imlInstruction->op_storeLoad.immS32 = immS32; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend ? PPCREC_IML_OP_FLAG_SIGNEXTEND : 0) | (switchEndian ? PPCREC_IML_OP_FLAG_SWITCHENDIAN : 0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } @@ -268,7 +264,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } @@ -284,7 +279,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppc imlInstruction->op_storeLoad.registerMem = registerMemory; imlInstruction->op_storeLoad.immS32 = immS32; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = false; } @@ -300,7 +294,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } @@ -4306,265 +4299,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) return false; - // add entire range + // set range + // todo - support non-continuous functions for the range tracking? ppcRecRange_t recRange; recRange.ppcAddress = ppcRecFunc->ppcAddress; recRange.ppcSize = ppcRecFunc->ppcSize; ppcRecFunc->list_ranges.push_back(recRange); - // process ppc instructions -// ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); -// bool unsupportedInstructionFound = false; -// sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; -// sint32 unsupportedInstructionCount = 0; -// uint32 unsupportedInstructionLastOffset = 0; -// uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; -// uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; -// -// while(ppcImlGenContext.currentInstruction < endCurrentInstruction) -// { -// uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); -// ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; -// ppcImlGenContext.cyclesSinceLastBranch++; -// ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); -// if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) -// { -// // add PPCEnter for addresses that are in entryAddresses -// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); -// } -// else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) -// { -// // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) -// uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); -// uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); -// if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) -// { -// // between two B(L) instructions -// // todo: for BL only if they are not inlineable -// -// bool canInlineFunction = false; -// if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) -// { -// uint32 li; -// PPC_OPC_TEMPL_I(opcodePrevious, li); -// sint32 inlineSize = 0; -// if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) -// canInlineFunction = true; -// } -// if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) -// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); -// } -// if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) -// { -// uint32 BO, BI, BD; -// PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); -// if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) -// { -// // after unconditional BCTR instruction -// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); -// } -// } -// } -// -// unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); -// if( unsupportedInstructionFound ) -// { -// unsupportedInstructionCount++; -// unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; -// unsupportedInstructionFound = false; -// //break; -// } -// } -// ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) -// if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) -// { -// debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); -// return false; -// } -// // optimize unused jumpmarks away -// // first, flag all jumpmarks as unused -// std::map map_jumpMarks; -// for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; -// } -// } -// // lastly, remove jumpmarks that still have the unused flag set -// sint32 currentImlIndex = 0; -// for(sint32 i=0; i end of segment after current instruction -// // If we encounter a jumpmark -> end of segment before current instruction -// // If we encounter ppc_enter -> end of segment before current instruction -// if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || -// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) -// { -// // segment ends after current instruction -// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); -// ppcRecSegment->startOffset = segmentStart; -// ppcRecSegment->count = segmentImlIndex-segmentStart+1; -// ppcRecSegment->ppcAddress = 0xFFFFFFFF; -// segmentStart = segmentImlIndex+1; -// } -// else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || -// ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) -// { -// // segment ends before current instruction -// if( segmentImlIndex > segmentStart ) -// { -// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); -// ppcRecSegment->startOffset = segmentStart; -// ppcRecSegment->count = segmentImlIndex-segmentStart; -// ppcRecSegment->ppcAddress = 0xFFFFFFFF; -// segmentStart = segmentImlIndex; -// } -// } -// segmentImlIndex++; -// } -// if( segmentImlIndex != segmentStart ) -// { -// // final segment -// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); -// ppcRecSegment->startOffset = segmentStart; -// ppcRecSegment->count = segmentImlIndex-segmentStart; -// ppcRecSegment->ppcAddress = 0xFFFFFFFF; -// segmentStart = segmentImlIndex; -// } -// // move iml instructions into the segments -// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) -// { -// uint32 imlStartIndex = segIt->startOffset; -// uint32 imlCount = segIt->count; -// if( imlCount > 0 ) -// { -// cemu_assert_debug(segIt->imlList.empty()); -// segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); -// -// } -// else -// { -// // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code -// cemu_assert_debug(segIt->imlList.empty()); -// } -// segIt->startOffset = 9999999; -// segIt->count = 9999999; -// } -// // clear segment-independent iml list -// free(ppcImlGenContext.imlList); -// ppcImlGenContext.imlList = nullptr; -// ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList -// // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) -// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) -// { -// uint32 segmentPPCAddrMin = 0xFFFFFFFF; -// uint32 segmentPPCAddrMax = 0x00000000; -// for(sint32 i=0; i< segIt->imlList.size(); i++) -// { -// if(segIt->imlList[i].associatedPPCAddress == 0 ) -// continue; -// //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) -// // continue; // jumpmarks and no-op instructions must not affect segment ppc address range -// segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); -// segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); -// } -// if( segmentPPCAddrMin != 0xFFFFFFFF ) -// { -// segIt->ppcAddrMin = segmentPPCAddrMin; -// segIt->ppcAddrMax = segmentPPCAddrMax; -// } -// else -// { -// segIt->ppcAddrMin = 0; -// segIt->ppcAddrMax = 0; -// } -// } -// // certain instructions can change the segment state -// // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) -// // jumpmarks mark the segment as a jump destination (within the same function) -// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) -// { -// while (segIt->imlList.size() > 0) -// { -// if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) -// { -// // mark segment as enterable -// if (segIt->isEnterable) -// assert_dbg(); // should not happen? -// segIt->isEnterable = true; -// segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; -// // remove ppc_enter instruction -// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; -// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; -// segIt->imlList[0].associatedPPCAddress = 0; -// } -// else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) -// { -// // mark segment as jump destination -// if(segIt->isJumpDestination ) -// assert_dbg(); // should not happen? -// segIt->isJumpDestination = true; -// segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; -// // remove jumpmark instruction -// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; -// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; -// segIt->imlList[0].associatedPPCAddress = 0; -// } -// else -// break; -// } -// } -// // the first segment is always enterable as the recompiled functions entrypoint -// ppcImlGenContext.segmentList2[0]->isEnterable = true; -// ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; -// -// // link segments for further inter-segment optimization -// PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) From d308252177b46a5bb3265b79c3296e99f366efbb Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 13 Dec 2022 05:41:26 +0100 Subject: [PATCH 18/64] PPCRec: Make LSWI/STWSI more generic + GPR temporaries storage --- .../Recompiler/BackendX64/BackendX64.cpp | 91 +----- .../Espresso/Recompiler/IML/IMLInstruction.h | 99 ++++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 2 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 1 - .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 3 - .../Recompiler/PPCRecompilerImlGen.cpp | 282 +++++------------- 7 files changed, 161 insertions(+), 319 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 008ad1d13..9ac9f23ff 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -415,7 +415,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Use sign extend move from memory instead of separate sign-extend? if( signExtend ) x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); else @@ -440,28 +439,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p // LWARX instruction costs extra cycles (this speeds up busy loops) x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( switchEndian == false ) - assert_dbg(); - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) - if(g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00); - } else return false; return true; @@ -599,36 +576,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes .. - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP); - - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } else return false; return true; @@ -1943,40 +1890,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { - if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n"); - return false; - } - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { // jump always - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - // jump to segment - if (imlSegment->nextSegmentBranchTaken == nullptr) - assert_dbg(); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - // deprecated (jump to jumpmark) - __debugbreak(); // deprecated - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - //x64Gen_jmp_imm32(x64GenContext, 0); - } + cemu_assert_debug(imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + x64Gen_jmp_imm32(x64GenContext, 0); } else { - if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - debug_printf("Unsupported deprecated cjump to ppc address\n"); - return false; - } cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - // generate jump update marker if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { @@ -2159,6 +2082,10 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, else assert_dbg(); } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } else assert_dbg(); } @@ -2187,6 +2114,10 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, else assert_dbg(); } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + } else assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 871e7966b..89f14af42 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -163,24 +163,18 @@ enum enum { PPCREC_NAME_NONE, - PPCREC_NAME_TEMPORARY, - PPCREC_NAME_R0 = 1000, - PPCREC_NAME_SPR0 = 2000, - PPCREC_NAME_FPR0 = 3000, - PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 + PPCREC_NAME_TEMPORARY = 1000, + PPCREC_NAME_R0 = 2000, + PPCREC_NAME_SPR0 = 3000, + PPCREC_NAME_FPR0 = 4000, + PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 }; // special cases for LOAD/STORE #define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) #define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) -#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1 -#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2 -#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3 -#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1 -#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2 -#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3 -#define PPC_REC_INVALID_REGISTER 0xFF +#define PPC_REC_INVALID_REGISTER 0xFF #define PPCREC_CR_BIT_LT 0 #define PPCREC_CR_BIT_GT 1 @@ -312,7 +306,6 @@ struct IMLInstruction }op_macro; struct { - bool jumpAccordingToSegment; //IMLSegment* destinationSegment; // if set, this replaces jumpmarkAddress uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) uint8 crRegisterIndex; uint8 crBitIndex; @@ -413,18 +406,82 @@ struct IMLInstruction void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) { - type = PPCREC_IML_TYPE_MACRO; - operation = macroId; - op_macro.param = param; - op_macro.param2 = param2; - op_macro.paramU16 = paramU16; + this->type = PPCREC_IML_TYPE_MACRO; + this->operation = macroId; + this->op_macro.param = param; + this->op_macro.param2 = param2; + this->op_macro.paramU16 = paramU16; } void make_cjump_cycle_check() { - type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - operation = 0; - crRegister = PPC_REC_INVALID_REGISTER; + this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + } + + + void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with two register operands (e.g. "t0 = t1") + this->type = PPCREC_IML_TYPE_R_R; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r.registerResult = registerResult; + this->op_r_r.registerA = registerA; + } + + void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with three register operands (e.g. "t0 = t1 + t4") + this->type = PPCREC_IML_TYPE_R_R_R; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r_r.registerResult = registerResult; + this->op_r_r_r.registerA = registerA; + this->op_r_r_r.registerB = registerB; + } + + void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") + this->type = PPCREC_IML_TYPE_R_R_S32; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r_s32.registerResult = registerResult; + this->op_r_r_s32.registerA = registerA; + this->op_r_r_s32.immS32 = immS32; + } + + // load from memory + void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_LOAD; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = signExtend; + } + + // store to memory + void make_memory_r(uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_STORE; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = false; } void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index a0b2a5179..a09d4bab1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -1211,7 +1211,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) #endif IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); - PPCRecompilerImlGen_generateNewInstruction_noOp(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex); + imlSegment->imlList[crSetterInstructionIndex].make_no_op(); } /* diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 6b35239a6..1dd1f7ba3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1203,7 +1203,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLS { if (routeDepth >= 64) { - forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress); + forceLogDebug_printf("Recompiler RA route maximum depth exceeded\n"); return; } route[routeDepth] = currentSegment; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 10cd0aa02..bd2c02d8f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -32,7 +32,6 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp struct ppcImlGenContext_t { class PPCFunctionBoundaryTracker* boundaryTracker; - PPCRecFunction_t* functionRef; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; IMLSegment* currentOutputSegment; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 0521c440b..4843fd1cc 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -26,7 +26,6 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress); void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); @@ -37,8 +36,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe // IML instruction generation (new style, can generate new instructions but also overwrite existing ones) -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); - void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index e3c818a6e..40844bb22 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -50,24 +50,9 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { - //if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) - //{ - // sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - // ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); - // ppcImlGenContext->imlListSize = newSize; - //} - //IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - //memset(imlInstruction, 0x00, sizeof(IMLInstruction)); - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - //imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - //ppcImlGenContext->imlListCount++; - //return imlInstruction; - IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); memset(&inst, 0x00, sizeof(IMLInstruction)); inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default -//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - return &inst; } @@ -86,28 +71,12 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { - // operation with three register operands (e.g. "t0 = t1 + t4") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_r.registerResult = registerResult; - imlInstruction->op_r_r_r.registerA = registerA; - imlInstruction->op_r_r_r.registerB = registerB; + ppcImlGenContext->emitInst().make_r_r_r(operation, registerResult, registerA, registerB, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { - // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_s32.registerResult = registerResult; - imlInstruction->op_r_r_s32.registerA = registerA; - imlInstruction->op_r_r_s32.immS32 = immS32; + ppcImlGenContext->emitInst().make_r_r_s32(operation, registerResult, registerA, immS32, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) @@ -153,25 +122,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte } -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) -{ - __debugbreak(); - - //// jump - //if (imlInstruction == NULL) - // imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - //else - // memset(imlInstruction, 0, sizeof(IMLInstruction)); - //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - //imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - //imlInstruction->op_conditionalJump.crRegisterIndex = 0; - //imlInstruction->op_conditionalJump.crBitIndex = 0; - //imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - // jump based on segment branches void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) { @@ -180,18 +130,22 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; imlInstruction->op_conditionalJump.crRegisterIndex = 0; imlInstruction->op_conditionalJump.crBitIndex = 0; imlInstruction->op_conditionalJump.bitMustBeSet = false; } -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) +void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->make_no_op(); + // conditional jump + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.condition = jumpCondition; + imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) @@ -209,48 +163,9 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen imlInstruction->op_cr.crB = crB; } -void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - __debugbreak(); - - //// conditional jump - //IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - //imlInstruction->op_conditionalJump.condition = jumpCondition; - //imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - //imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - //imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_LOAD; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = signExtend; + ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); } void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) @@ -270,17 +185,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_STORE; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = false; + ppcImlGenContext->emitInst().make_memory_r(registerSource, registerMemory, immS32, copyWidth, switchEndian); } void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) @@ -630,10 +535,11 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin } } // add range - ppcRecRange_t recRange; - recRange.ppcAddress = startAddress; - recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR - ppcImlGenContext->functionRef->list_ranges.push_back(recRange); + cemu_assert_unimplemented(); + //ppcRecRange_t recRange; + //recRange.ppcAddress = startAddress; + //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR + //ppcImlGenContext->functionRef->list_ranges.push_back(recRange); } bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -653,14 +559,9 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } // is jump destination within recompiled function? if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) - { - // jump to target within same function PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); - } else - { ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - } return true; } @@ -778,10 +679,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // far jump debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); return false; - - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - //ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -2190,70 +2087,40 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, rD, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) + + if (rA == 0) { - // if nb == 4 this instruction immitates LWZ - if( rA == 0 ) - { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true); - return true; + cemu_assert_unimplemented(); // special form where gpr is ignored and EA is 0 + return false; } - else if( nb == 2 ) + + // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift + + uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 memOffset = 0; + while (nb > 0) { - // if nb == 2 this instruction immitates a LHZ but the result is shifted left by 16 bits - if( rA == 0 ) - { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif + if (rD == rA) return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true); - // shift - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, destinationRegister, destinationRegister, 16); - return true; - } - else if( nb == 3 ) - { - // if nb == 3 this instruction loads a 3-byte big-endian and the result is shifted left by 8 bits - if( rA == 0 ) + cemu_assert(rD < 32); + uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + // load bytes one-by-one + for (sint32 b = 0; b < 4; b++) { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; + ppcImlGenContext->emitInst().make_r_memory(tmpReg, memReg, memOffset + b, 8, false, false); + sint32 shiftAmount = (3 - b) * 8; + if(shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_r_r(b == 0 ? PPCREC_IML_OP_ASSIGN : PPCREC_IML_OP_OR, destinationRegister, tmpReg); + nb--; + if (nb == 0) + break; } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, PPC_REC_STORE_LSWI_3, false, true); - return true; + memOffset += 4; + rD++; } - debug_printf("PPCRecompilerImlGen_LSWI(): Unsupported nb value %d\n", nb); - return false; + return true; } bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2262,38 +2129,32 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, 32, true); - return true; - } - else if( nb == 2 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store half-word (shifted << 16) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_2, false); - return true; - } - else if( nb == 3 ) + + uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 memOffset = 0; + while (nb > 0) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store 3-byte-word (shifted << 8) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_3, false); - return true; + if (rS == rA) + return false; + cemu_assert(rS < 32); + uint32 dataRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + // store bytes one-by-one + for (sint32 b = 0; b < 4; b++) + { + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister); + sint32 shiftAmount = (3 - b) * 8; + if (shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false); + nb--; + if (nb == 0) + break; + } + memOffset += 4; + rS++; } - debug_printf("PPCRecompilerImlGen_STSWI(): Unsupported nb value %d\n", nb); - return false; + return true; } bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -4293,9 +4154,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) { - ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker ppcImlGenContext.boundaryTracker = &boundaryTracker; - if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) return false; @@ -4343,8 +4202,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - - PPCRecompilerImlGen_generateNewInstruction_noOp(&ppcImlGenContext, lastInstruction); + lastInstruction->make_no_op(); // append conditional moves based on branch condition for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) From 832b761c5ec9fdaa9dbc93439333aee2442ab513 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 13 Dec 2022 19:19:29 +0100 Subject: [PATCH 19/64] PPCRec: Make register pool for RA configurable --- .../Recompiler/BackendX64/BackendX64.cpp | 375 ++++++++---------- .../Recompiler/BackendX64/BackendX64.h | 10 +- .../Recompiler/BackendX64/BackendX64FPU.cpp | 24 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 3 - .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 98 ----- .../Recompiler/IML/IMLRegisterAllocator.cpp | 75 ++-- .../Recompiler/IML/IMLRegisterAllocator.h | 94 +++++ .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 17 +- 8 files changed, 332 insertions(+), 364 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 9ac9f23ff..997555955 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,18 +8,13 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" -sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping -{ - REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX -}; - /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted */ -void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) +void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr) { - x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo); } /* @@ -121,7 +116,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { - uint32 branchDstReg = tempToRealRegister(imlInstruction->op_macro.param); + uint32 branchDstReg = imlInstruction->op_macro.param; if(REG_RDX != branchDstReg) x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg); // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX @@ -323,11 +318,11 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, */ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; if( indexed && realRegisterMem == realRegisterMem2 ) { return false; @@ -449,11 +444,11 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p */ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if (indexed) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; if (indexed && realRegisterMem == realRegisterMem2) { @@ -588,11 +583,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult = registerA if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) { // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -603,7 +598,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) @@ -611,24 +606,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult = endianSwap32(registerA) if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) assert_dbg(); - x64Gen_bswap_reg64Lower32bit(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { // registerResult += registerA PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -645,17 +640,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= registerA - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else { // registerResult ^= registerA - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -669,15 +664,15 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // NOT destination register - x64Gen_not_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); // update cr bits if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // NOT instruction does not update flags, so we have to generate an additional TEST instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -690,20 +685,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { - x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerA), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA); sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32-1); + x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); + x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1); sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); } } @@ -725,7 +720,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // set cr bits sint32 crRegister = imlInstruction->crRegister; if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) @@ -757,10 +752,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // NEG destination register - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); // update cr bits if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -774,12 +769,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 0); + x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) @@ -799,19 +794,19 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), (uint32)-1); + x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // set cr bits sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -819,8 +814,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { // registerResult = ~registerOperand1 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r.registerA); + sint32 rRegResult = imlInstruction->op_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r.registerA; // copy operand to result register x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); // execute NOT on result @@ -850,12 +845,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { // registerResult = (uint32)(sint32)(sint16)registerA PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), reg32ToReg16(tempToRealRegister(imlInstruction->op_r_r.registerA))); + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -871,8 +866,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerResult); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -881,7 +876,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -902,7 +897,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { // registerResult = immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { @@ -912,7 +907,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { assert_dbg(); } - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { @@ -927,13 +922,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, // update cr register assert_dbg(); } - x64Gen_sub_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) @@ -951,14 +946,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, // registerResult |= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_or_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { // registerResult ^= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_xor_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { @@ -967,7 +962,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen - x64Gen_rol_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint8)imlInstruction->op_r_immS32.immS32); + x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { @@ -989,7 +984,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), imlInstruction->op_r_immS32.immS32); + x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); // set cr bits uint32 crRegister = imlInstruction->crRegister; if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) @@ -1017,7 +1012,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 destRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); + uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); for(sint32 f=0; f<32; f++) { @@ -1028,7 +1023,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 srcRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); + uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); for (sint32 f = 0; f < 32; f++) { @@ -1066,17 +1061,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1084,17 +1079,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1102,17 +1097,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1120,9 +1115,9 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } return false; @@ -1134,9 +1129,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 + registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) @@ -1197,9 +1192,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 - registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( rRegOperand1 == rRegOperand2 ) { // result = operand1 - operand1 -> 0 @@ -1241,9 +1236,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 - registerOperand2 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( rRegOperand1 == rRegOperand2 ) { // copy xer_ca to eflags carry @@ -1295,9 +1290,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 * registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -1334,9 +1329,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { return false; } - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperandA = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperandB = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB; // update carry flag // carry flag is detected this way: //if ((~a+b) < a) { @@ -1402,9 +1397,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) { @@ -1455,9 +1450,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; // todo: Use BMI2 rotate if available // check if CL/ECX/RCX is available if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX ) @@ -1502,9 +1497,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; // save cr if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1576,9 +1571,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); @@ -1621,9 +1616,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); @@ -1669,9 +1664,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 | ~registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); @@ -1705,8 +1700,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1719,8 +1714,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction { // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1749,8 +1744,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // registerResult = immS32 - registerOperand PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1799,23 +1794,23 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // save cr cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); // rotate destination register if( sh ) x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), ~mask); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, ~mask); // AND temporary rS register with mask x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), REG_RESV_TEMP); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize if( rRegResult != rRegOperand ) @@ -1829,7 +1824,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); // todo: Detect if we don't need to update carry // generic case // TEST registerResult, (1<<(SH+1))-1 @@ -1838,11 +1833,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction caTestMask = 0x7FFFFFFF; else caTestMask = (1 << (sh)) - 1; - x64Gen_test_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), caTestMask); + x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask); // SETNE/NZ [ESP+XER_CA] x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // SAR registerResult, SH - x64Gen_sar_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), sh); + x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); // JNS (if sign not set) sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here @@ -1854,7 +1849,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); @@ -1866,17 +1861,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); // Shift if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) - x64Gen_shl_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); + x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); else - x64Gen_shr_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); + x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); // CR update if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { // since SHL/SHR only modifies the OF flag we need another TEST reg,reg here - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -1894,7 +1889,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { // jump always cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmp_imm32(x64GenContext, 0); } else @@ -1904,7 +1899,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); sint32 condition = imlInstruction->op_conditionalJump.condition; if( condition == PPCREC_JUMP_CONDITION_E ) x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); @@ -1922,19 +1917,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); return true; } @@ -1943,19 +1938,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -1964,19 +1959,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -1985,7 +1980,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec } x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); cemu_assert_debug(imlSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) { x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2009,7 +2004,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction // BT x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken()); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); return true; } @@ -2063,28 +2058,28 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else assert_dbg(); @@ -2095,28 +2090,28 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); } else assert_dbg(); @@ -2338,70 +2333,44 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // fix relocs for(auto& relocIt : x64GenContext.relocateOffsetTable2) { - if(relocIt.type == X64_RELOC_LINK_TO_PPC || relocIt.type == X64_RELOC_LINK_TO_SEGMENT) - { - // if link to PPC, search for segment that starts with this offset - uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; - uint32 x64Offset = 0xFFFFFFFF; - if (relocIt.type == X64_RELOC_LINK_TO_PPC) - { - cemu_assert_suspicious(); - //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - //{ - // if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) - // { - // x64Offset = segIt->x64Offset; - // break; - // } - //} - //if (x64Offset == 0xFFFFFFFF) - //{ - // debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // // todo: Cleanup - // return false; - //} - } - else + // search for segment that starts with this offset + uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; + uint32 x64Offset = 0xFFFFFFFF; + + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; + x64Offset = destSegment->x64Offset; + + uint32 relocBase = relocIt.offset; + uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; + if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) + { + // Jcc relativeImm32 + sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); + if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled { - IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; - x64Offset = destSegment->x64Offset; + // convert to near Jcc + *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); + // patch offset + *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; + // replace unused 4 bytes with NOP instruction + relocInstruction[2] = 0x0F; + relocInstruction[3] = 0x1F; + relocInstruction[4] = 0x40; + relocInstruction[5] = 0x00; } - uint32 relocBase = relocIt.offset; - uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; - if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) - { - // Jcc relativeImm32 - sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); - if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled - { - // convert to near Jcc - *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); - // patch offset - *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; - // replace unused 4 bytes with NOP instruction - relocInstruction[2] = 0x0F; - relocInstruction[3] = 0x1F; - relocInstruction[4] = 0x40; - relocInstruction[5] = 0x00; - } - else - { - // patch offset - *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); - } - } - else if( relocInstruction[0] == 0xE9 ) + else { - // JMP relativeImm32 - *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); + // patch offset + *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); } - else - assert_dbg(); } - else + else if( relocInstruction[0] == 0xE9 ) { - assert_dbg(); + // JMP relativeImm32 + *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); } + else + assert_dbg(); } // copy code to executable memory diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 5a2b75000..347f2ea1f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -3,10 +3,9 @@ struct x64RelocEntry_t { - x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; + x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {}; uint32 offset; - uint8 type; void* extraInfo; }; @@ -97,10 +96,6 @@ struct x64GenContext_t #define REG_RESV_FPR_TEMP (15) -extern sint32 x64Gen_registerMap[12]; - -#define tempToRealRegister(__x) (x64Gen_registerMap[__x]) -#define tempToRealFPRRegister(__x) (__x) #define reg32ToReg16(__x) (__x) enum @@ -128,9 +123,6 @@ enum #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) -#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset -#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment - #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index b70a9a319..5bb2505d3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -10,11 +10,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -27,11 +27,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -268,11 +268,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) @@ -384,7 +384,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) { - PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); + PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); } else { @@ -579,11 +579,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) { @@ -670,7 +670,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) { - PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); + PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 72a2d3f5d..3dcd50b6f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -24,9 +24,6 @@ void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcIml void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); -// register allocator -void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); - // debug void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index a09d4bab1..ae3c6c799 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,104 +6,6 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -struct replacedRegisterTracker_t -{ - struct - { - sint32 instructionIndex; - sint32 registerPreviousName; - sint32 registerNewName; - sint32 index; // new index - sint32 previousIndex; // previous index (always out of range) - bool nameMustBeMaintained; // must be stored before replacement and loaded after replacement ends - }replacedRegisterEntry[PPC_X64_GPR_USABLE_REGISTERS]; - sint32 count; -}; - -bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) -{ - IMLUsedRegisters registersUsed; - imlSegment->imlList[imlIndexStart].CheckRegisterUsage(®istersUsed); - // mask all registers used by this instruction - uint32 instructionReservedRegisterMask = 0; - if( registersUsed.readNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg1)); - if( registersUsed.readNamedReg2 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg2)); - if( registersUsed.readNamedReg3 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg3)); - if( registersUsed.writtenNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.writtenNamedReg1)); - // mask all registers that are reserved for other replacements - uint32 replacementReservedRegisterMask = 0; - for(sint32 i=0; icount; i++) - { - replacementReservedRegisterMask |= (1<replacedRegisterEntry[i].index); - } - - // potential improvement: Scan ahead a few instructions and look for registers that are the least used (or ideally never used) - - // pick available register - const uint32 allRegisterMask = (1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; // mask with set bit for every register - uint32 reservedRegisterMask = instructionReservedRegisterMask | replacementReservedRegisterMask; - cemu_assert(instructionReservedRegisterMask != allRegisterMask); // no usable register! (Need to store a register from the replacedRegisterTracker) - sint32 usedRegisterIndex = -1; - for(sint32 i=0; imappedRegister[i] != -1 ) - { - // register is reserved by segment -> In use - *isUsed = true; - *registerName = ppcImlGenContext->mappedRegister[i]; - } - else - { - *isUsed = false; - *registerName = -1; - } - *registerIndex = i; - return true; - } - } - return false; - -} - -void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) -{ - // store register - sint32 imlIndexEdit = *imlIndex; - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - // name_unusedRegister = unusedRegister - IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0]; - memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr.type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; - imlIndexEdit++; - // load new register if required - if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained ) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit]; - memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr.type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; - imlIndexEdit += 1; - } - // move last entry to current one - memcpy(replacedRegisterTracker->replacedRegisterEntry+registerTrackerIndex, replacedRegisterTracker->replacedRegisterEntry+replacedRegisterTracker->count-1, sizeof(replacedRegisterTracker->replacedRegisterEntry[0])); - replacedRegisterTracker->count--; - *imlIndex = imlIndexEdit; -} - bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { // only xmm0 to xmm14 may be used, xmm15 is reserved diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 1dd1f7ba3..1b720d26d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -2,10 +2,16 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" +#include "IMLRegisterAllocator.h" #include "IMLRegisterAllocatorRanges.h" #include "../BackendX64/BackendX64.h" +struct IMLRegisterAllocatorContext +{ + IMLRegisterAllocatorParameters* raParam; +}; + uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() @@ -212,10 +218,10 @@ typedef struct sint32 liveRangesCount; }raLiveRangeInfo_t; -// return a bitmask that contains only registers that are not used by any colliding range -uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) +// mark occupied registers by any overlapping range as unavailable in physRegSet +void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) { - uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; + //uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { IMLSegment* imlSegment = subrange->imlSegment; @@ -233,14 +239,13 @@ uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) { - if(subrangeItr->range->physicalRegister >= 0) - physRegisterMask &= ~(1<<(subrangeItr->range->physicalRegister)); + if (subrangeItr->range->physicalRegister >= 0) + physRegSet.SetReserved(subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } } - return physRegisterMask; } bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } @@ -326,7 +331,7 @@ void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegm // todo } -bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); @@ -380,24 +385,22 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe continue; } // find free register for this segment - uint32 physRegisterMask = (1<physicalRegisterPool; + for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->range->physicalRegister < 0) - assert_dbg(); - physRegisterMask &= ~(1<range->physicalRegister); + cemu_assert_debug(liverange->range->physicalRegister >= 0); + physRegSet.SetReserved(liverange->range->physicalRegister); } // check intersections with other ranges and determine allowed registers - uint32 allowedPhysRegisterMask = 0; - uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks) - if (physRegisterMask != 0) + IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) + if(physRegSet.HasAnyAvailable()) { - // check globally - allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range); - physRegisterMask &= allowedPhysRegisterMask; + // check globally in all segments + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet); } - if (physRegisterMask == 0) + if (!physRegSet.HasAnyAvailable()) { struct { @@ -480,14 +483,16 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe spillStrategies.availableRegisterHole.physRegister = -1; if (currentIndex >= 0) { - if (unusedRegisterMask != 0) + if (localAvailableRegsMask.HasAnyAvailable()) { - for (sint32 t = 0; t < PPC_X64_GPR_USABLE_REGISTERS; t++) + sint32 physRegItr = -1; + while (true) { - if ((unusedRegisterMask&(1 << t)) == 0) - continue; + physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); + if (physRegItr < 0) + break; // get size of potential hole for this register - sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, t); + sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, physRegItr); if (distance < 2) continue; // not worth consideration // calculate additional cost due to split @@ -500,7 +505,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe { spillStrategies.availableRegisterHole.cost = cost; spillStrategies.availableRegisterHole.distance = distance; - spillStrategies.availableRegisterHole.physRegister = t; + spillStrategies.availableRegisterHole.physRegister = physRegItr; } } } @@ -611,16 +616,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe return false; } // assign register to range - sint32 registerIndex = -1; - for (sint32 f = 0; f < PPC_X64_GPR_USABLE_REGISTERS; f++) - { - if ((physRegisterMask&(1 << f)) != 0) - { - registerIndex = f; - break; - } - } - subrangeItr->range->physicalRegister = registerIndex; + subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); // add to live ranges liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; @@ -630,7 +626,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe return true; } -void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; @@ -647,7 +643,7 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt); + done = PPCRecRA_assignSegmentRegisters(ctx, ppcImlGenContext, segIt); if (done == false) break; } @@ -997,8 +993,11 @@ void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGen } } -void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) { + IMLRegisterAllocatorContext ctx; + ctx.raParam = &raParam; + PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); @@ -1006,7 +1005,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ppcImlGenContext); + PPCRecRA_assignRegisters(ctx, ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); PPCRecRA_generateMoveInstructions(ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index e69de29bb..87e36b00c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -0,0 +1,94 @@ + +// container for storing a set of register indices +// specifically optimized towards storing physical register indices (expected to be below 64) +class IMLPhysRegisterSet +{ +public: + void SetAvailable(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask |= ((uint64)1 << index); + } + + void SetReserved(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask &= ~((uint64)1 << index); + } + + bool IsAvailable(uint32 index) const + { + return (m_regBitmask & (1 << index)) != 0; + } + + IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask &= other.m_regBitmask; + return *this; + } + + IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask = other.m_regBitmask; + return *this; + } + + bool HasAnyAvailable() const + { + return m_regBitmask != 0; + } + + // returns index of first available register. Do not call when HasAnyAvailable() == false + uint32 GetFirstAvailableReg() + { + cemu_assert_debug(m_regBitmask != 0); + uint32 regIndex = 0; + auto tmp = m_regBitmask; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + + // returns index of next available register (search includes any register index >= startIndex) + // returns -1 if there is no more register + sint32 GetNextAvailableReg(sint32 startIndex) + { + if (startIndex >= 64) + return -1; + uint32 regIndex = startIndex; + auto tmp = m_regBitmask; + tmp >>= regIndex; + if (!tmp) + return -1; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + +private: + uint64 m_regBitmask{ 0 }; +}; + + +struct IMLRegisterAllocatorParameters +{ + IMLPhysRegisterSet physicalRegisterPool; +}; + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 2565e3ee5..838b61f50 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -14,6 +14,7 @@ #include "util/MemMapper/MemMapper.h" #include "IML/IML.h" +#include "IML/IMLRegisterAllocator.h" #include "BackendX64/BackendX64.h" struct PPCInvalidationRange @@ -272,7 +273,21 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) } } - IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); + IMLRegisterAllocatorParameters raParam; + raParam.physicalRegisterPool.SetAvailable(REG_RAX); + raParam.physicalRegisterPool.SetAvailable(REG_RDX); + raParam.physicalRegisterPool.SetAvailable(REG_RBX); + raParam.physicalRegisterPool.SetAvailable(REG_RBP); + raParam.physicalRegisterPool.SetAvailable(REG_RSI); + raParam.physicalRegisterPool.SetAvailable(REG_RDI); + raParam.physicalRegisterPool.SetAvailable(REG_R8); + raParam.physicalRegisterPool.SetAvailable(REG_R9); + raParam.physicalRegisterPool.SetAvailable(REG_R10); + raParam.physicalRegisterPool.SetAvailable(REG_R11); + raParam.physicalRegisterPool.SetAvailable(REG_R12); + raParam.physicalRegisterPool.SetAvailable(REG_RCX); + + IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); // remove redundant name load and store instructions PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); From 53139cd8b27be1600fb343c8684ef575d6ae6ade Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 14 Dec 2022 06:52:06 +0100 Subject: [PATCH 20/64] PPCRec: Rename register constants to avoid name collision --- .../Recompiler/BackendX64/BackendX64.cpp | 394 +++++++++--------- .../Recompiler/BackendX64/BackendX64.h | 106 ++--- .../Recompiler/BackendX64/BackendX64FPU.cpp | 80 ++-- .../Recompiler/BackendX64/BackendX64Gen.cpp | 36 +- .../BackendX64/BackendX64GenFPU.cpp | 46 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 24 +- 6 files changed, 330 insertions(+), 356 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 997555955..79e1da8f9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -70,11 +70,11 @@ void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcIm { sint32 crRegister = imlInstruction->crRegister; if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Set CR SO if XER SO bit is set PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, crRegister, PPCREC_CR_STATE_TYPE_LOGICAL); } @@ -117,8 +117,8 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { uint32 branchDstReg = imlInstruction->op_macro.param; - if(REG_RDX != branchDstReg) - x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg); + if(X86_REG_RDX != branchDstReg) + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg); // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX // JMP [offset+RDX*(8/4)+R15] x64Gen_writeU8(x64GenContext, 0x41); @@ -132,10 +132,10 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { // MOV DWORD [SPR_LinkRegister], newLR uint32 newLR = imlInstruction->op_macro.param + 4; - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), newLR); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), newLR); // remember new instruction pointer in RDX uint32 newIP = imlInstruction->op_macro.param2; - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, newIP); + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP); // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; if (lookupOffset >= 0x80000000ULL) @@ -160,7 +160,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { // remember new instruction pointer in RDX uint32 newIP = imlInstruction->op_macro.param2; - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, newIP); + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP); // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; if (lookupOffset >= 0x80000000ULL) @@ -185,7 +185,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { uint32 currentInstructionAddress = imlInstruction->op_macro.param; // remember PC value in REG_EDX - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, currentInstructionAddress); + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, currentInstructionAddress); uint32 newIP = 0; // special value for recompiler exit uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; @@ -205,7 +205,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) { uint32 cycleCount = imlInstruction->op_macro.param; - x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); + x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); return true; } else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE ) @@ -214,58 +214,58 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, uint32 funcId = imlInstruction->op_macro.param2; //x64Gen_int3(x64GenContext); // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); //// save hCPU (RSP) //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); //x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_TEMP, 0, REG_RSP); // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, REG_RCX, REG_RSP); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RDX, funcId); + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, X86_REG_RSP); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId); // restore stackpointer from executionContext/hCPU->rspTemp - x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); + x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); //x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_R14, 0); //x64Gen_int3(x64GenContext); // reserve space on stack for call parameters - x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment - x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); + x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0); // call HLE function - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_virtualHLE); - x64Gen_call_reg64(x64GenContext, REG_RAX); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE); + x64Gen_call_reg64(x64GenContext, X86_REG_RAX); // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE) //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); //x64Emit_mov_reg64_mem64Reg64(x64GenContext, REG_RSP, REG_RESV_TEMP, 0); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RSP, REG_RAX); + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RSP, X86_REG_RAX); // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, REG_R13, (uint64)memory_base); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); // check if cycles where decreased beyond zero, if yes -> leave recompiler - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); //x64Gen_int3(x64GenContext); //x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function) - x64Gen_xor_reg32_reg32(x64GenContext, REG_RAX, REG_RAX); + x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX); // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_R15); + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_R15); //// JMP [recompilerCallTable+EAX/4*8] //x64Gen_int3(x64GenContext); - x64Gen_jmp_memReg64(x64GenContext, REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); // check if instruction pointer was changed // assign new instruction pointer to EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_RAX, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); // remember instruction pointer in REG_EDX - x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, REG_RAX); + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX); // EAX *= 2 - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_RAX); + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX); // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_R15); + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_R15); // JMP [ppcRecompilerDirectJumpTable+RAX/4*8] - x64Gen_jmp_memReg64(x64GenContext, REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); return true; } else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) @@ -277,32 +277,32 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, REG_RCX, REG_RSP); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RDX, gprIndex); + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, X86_REG_RSP); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, gprIndex); // restore stackpointer to original RSP - x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); + x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); // push hCPU on stack - x64Gen_push_reg64(x64GenContext, REG_RCX); + x64Gen_push_reg64(x64GenContext, X86_REG_RCX); // reserve space on stack for call parameters - x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); + x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11 + 8); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0); // call function if( sprId == SPR_TBL ) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_getTBL); else if( sprId == SPR_TBU ) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBU); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_getTBU); else assert_dbg(); - x64Gen_call_reg64(x64GenContext, REG_RAX); + x64Gen_call_reg64(x64GenContext, X86_REG_RAX); // restore hCPU from stack - x64Gen_add_reg64_imm32(x64GenContext, REG_RSP, 8 * 11 + 8); - x64Gen_pop_reg64(x64GenContext, REG_RSP); + x64Gen_add_reg64_imm32(x64GenContext, X86_REG_RSP, 8 * 11 + 8); + x64Gen_pop_reg64(x64GenContext, X86_REG_RSP); // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, REG_R13, (uint64)memory_base); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); return true; } else @@ -350,20 +350,20 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p { if (indexed) { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); //if (indexed && realRegisterMem != realRegisterData) // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); } } else { if (indexed) { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); //if (realRegisterMem != realRegisterData) // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if (switchEndian) @@ -371,7 +371,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if (switchEndian) x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); } @@ -386,13 +386,13 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } if(g_CPUFeatures.x86.movbe && switchEndian ) { - x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( switchEndian ) @@ -411,9 +411,9 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( signExtend ) - x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); else - x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } @@ -424,15 +424,15 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p assert_dbg(); // not supported if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( switchEndian ) x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); + x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); } else return false; @@ -484,9 +484,9 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if (g_CPUFeatures.x86.movbe && swapEndian) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } @@ -499,7 +499,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); if (indexed) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // todo: Optimize this, e.g. by using MOVBE @@ -526,9 +526,9 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, assert_dbg(); // todo // reset cr0 LT, GT and EQ sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); // calculate effective address x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); if (swapEndian) @@ -541,32 +541,32 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); // EA matches reservation // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); // backup REG_RESV_MEMBASE x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); // add mem register to REG_RESV_MEMBASE x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX); + x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX); //x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP); x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); // reset reservation x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); // restore REG_RESV_MEMBASE x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); @@ -726,21 +726,21 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) { if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Also set summary overflow if xer bit is set } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Also set summary overflow if xer bit is set } else @@ -772,18 +772,18 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // set cr bits sint32 crRegister = imlInstruction->crRegister; - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by AND/OR - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by AND/OR + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Use different version of PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction) // todo: Also set summary overflow if xer bit is set } @@ -797,11 +797,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // set cr bits @@ -821,11 +821,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // execute NOT on result x64Gen_not_reg64Low32(x64GenContext, rRegResult); // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0); // update carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // update cr if requested if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -990,20 +990,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) { if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); } else assert_dbg(); @@ -1016,7 +1016,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); for(sint32 f=0; f<32; f++) { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); } } @@ -1029,9 +1029,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { if(((crBitMask >> f) & 1) == 0) continue; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); } } else @@ -1113,7 +1113,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR } } PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); if (imlInstruction->op_conditional_r_s32.bitMustBeSet) x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); else @@ -1141,7 +1141,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { if( addCarry ) { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } else @@ -1151,7 +1151,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { if( addCarry ) { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } else @@ -1165,7 +1165,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // add operand2 if( addCarry ) { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } else @@ -1174,7 +1174,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // update carry if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) { - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } // set cr bits if enabled if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) @@ -1242,7 +1242,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, if( rRegOperand1 == rRegOperand2 ) { // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_cmc(x64GenContext); // result = operand1 - operand1 -> 0 x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); @@ -1250,7 +1250,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( rRegResult == rRegOperand1 ) { // copy inverted xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_cmc(x64GenContext); // result = result - operand2 x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); @@ -1261,7 +1261,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // NOT result x64Gen_not_reg64Low32(x64GenContext, rRegResult); // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // ADC result, operand1 x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } @@ -1270,13 +1270,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // copy operand1 to destination register before doing addition x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_cmc(x64GenContext); // sub operand2 x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } // update carry flag (todo: is this actually correct in all cases?) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // update cr0 if requested if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1341,7 +1341,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // return true; //} // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // ((~a+b)<~a) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); @@ -1352,7 +1352,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); @@ -1364,7 +1364,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); // do subtraction if( rRegOperandB == rRegOperandA ) @@ -1455,16 +1455,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; // todo: Use BMI2 rotate if available // check if CL/ECX/RCX is available - if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX ) + if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX ) { // swap operand 2 with RCX - x64Gen_xchg_reg64_reg64(x64GenContext, REG_RCX, rRegOperand2); + x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); // move operand 1 to temp register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); // rotate x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP); // undo swap operand 2 with RCX - x64Gen_xchg_reg64_reg64(x64GenContext, REG_RCX, rRegOperand2); + x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); // copy to result register x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } @@ -1509,7 +1509,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // MOV registerResult, registerOperand (if different) x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); // reset carry - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); sint32 jumpInstructionJumpToSignedShift = x64GenContext->codeBufferIndex; @@ -1547,7 +1547,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->codeBufferIndex); // arithmetic shift if( b == 5 ) @@ -1575,17 +1575,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); // mov operand1 to EAX - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_EAX, rRegOperand1); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1); // sign or zero extend EAX to EDX:EAX based on division sign mode if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED ) x64Gen_cdq(x64GenContext); else - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_EDX, REG_EDX); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX); // make sure we avoid division by zero x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3); @@ -1595,13 +1595,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP); // result of division is now stored in EAX, move it to result register - if( rRegResult != REG_EAX ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_EAX); + if( rRegResult != X86_REG_EAX ) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX); // restore EAX / EDX - if( rRegResult != REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - if( rRegResult != REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EDX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + if( rRegResult != X86_REG_RAX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + if( rRegResult != X86_REG_RDX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); // set cr bits if requested if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1620,16 +1620,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); // mov operand1 to EAX - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_EAX, rRegOperand1); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1); if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED ) { // zero extend EAX to EDX:EAX - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_EDX, REG_EDX); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX); } else { @@ -1642,13 +1642,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP); // result of multiplication is now stored in EDX:EAX, move it to result register - if( rRegResult != REG_EDX ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_EDX); + if( rRegResult != X86_REG_EDX ) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX); // restore EAX / EDX - if( rRegResult != REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - if( rRegResult != REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EDX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + if( rRegResult != X86_REG_RAX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + if( rRegResult != X86_REG_RDX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); // set cr bits if requested if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1724,7 +1724,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction } x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); // update carry flag - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // set cr bits if enabled if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1753,7 +1753,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); } // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // ((~a+b)<~a) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); @@ -1764,7 +1764,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); @@ -1776,7 +1776,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); // do actual computation of value, note: a - b is equivalent to a + ~b + 1 x64Gen_not_reg64Low32(x64GenContext, rRegResult); @@ -1835,14 +1835,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction caTestMask = (1 << (sh)) - 1; x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask); // SETNE/NZ [ESP+XER_CA] - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // SAR registerResult, SH x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); // JNS (if sign not set) sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here // MOV BYTE [ESP+xer_ca], 0 - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // jump destination PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); // CR update @@ -1850,9 +1850,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction { sint32 crRegister = imlInstruction->crRegister; x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); } } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || @@ -1978,7 +1978,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec } cemu_assert_debug(false); // should not reach? } - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); cemu_assert_debug(imlSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) @@ -2002,7 +2002,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC // BT - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken()); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2018,19 +2018,19 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) { // clear cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); return true; } else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET) { // set cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); return true; } else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC ) { - x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); + x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) { return false; // untested @@ -2038,11 +2038,11 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement } if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC) - x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); + x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); else - x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); + x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); + x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); return true; } @@ -2058,28 +2058,28 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else assert_dbg(); @@ -2090,28 +2090,28 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); } else assert_dbg(); @@ -2392,21 +2392,21 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // start of recompiler entry function - x64Gen_push_reg64(&x64GenContext, REG_RAX); - x64Gen_push_reg64(&x64GenContext, REG_RCX); - x64Gen_push_reg64(&x64GenContext, REG_RDX); - x64Gen_push_reg64(&x64GenContext, REG_RBX); - x64Gen_push_reg64(&x64GenContext, REG_RBP); - x64Gen_push_reg64(&x64GenContext, REG_RDI); - x64Gen_push_reg64(&x64GenContext, REG_RSI); - x64Gen_push_reg64(&x64GenContext, REG_R8); - x64Gen_push_reg64(&x64GenContext, REG_R9); - x64Gen_push_reg64(&x64GenContext, REG_R10); - x64Gen_push_reg64(&x64GenContext, REG_R11); - x64Gen_push_reg64(&x64GenContext, REG_R12); - x64Gen_push_reg64(&x64GenContext, REG_R13); - x64Gen_push_reg64(&x64GenContext, REG_R14); - x64Gen_push_reg64(&x64GenContext, REG_R15); + x64Gen_push_reg64(&x64GenContext, X86_REG_RAX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RCX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RDX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RBX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RBP); + x64Gen_push_reg64(&x64GenContext, X86_REG_RDI); + x64Gen_push_reg64(&x64GenContext, X86_REG_RSI); + x64Gen_push_reg64(&x64GenContext, X86_REG_R8); + x64Gen_push_reg64(&x64GenContext, X86_REG_R9); + x64Gen_push_reg64(&x64GenContext, X86_REG_R10); + x64Gen_push_reg64(&x64GenContext, X86_REG_R11); + x64Gen_push_reg64(&x64GenContext, X86_REG_R12); + x64Gen_push_reg64(&x64GenContext, X86_REG_R13); + x64Gen_push_reg64(&x64GenContext, X86_REG_R14); + x64Gen_push_reg64(&x64GenContext, X86_REG_R15); // 000000007775EF04 | E8 00 00 00 00 call +0x00 x64Gen_writeU8(&x64GenContext, 0xE8); @@ -2421,37 +2421,37 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() x64Gen_writeU8(&x64GenContext, 0x24); uint32 jmpPatchOffset = x64GenContext.codeBufferIndex; x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP - x64Emit_mov_mem64_reg64(&x64GenContext, REG_RDX, offsetof(PPCInterpreter_t, rspTemp), REG_RSP); + x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP); // MOV RSP, RDX (ppc interpreter instance) - x64Gen_mov_reg64_reg64(&x64GenContext, REG_RSP, REG_RDX); + x64Gen_mov_reg64_reg64(&x64GenContext, X86_REG_RSP, X86_REG_RDX); // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(&x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); + x64Gen_mov_reg64_imm64(&x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); // MOV R13, memory_base - x64Gen_mov_reg64_imm64(&x64GenContext, REG_R13, (uint64)memory_base); + x64Gen_mov_reg64_imm64(&x64GenContext, X86_REG_R13, (uint64)memory_base); //JMP recFunc - x64Gen_jmp_reg64(&x64GenContext, REG_RCX); // call argument 1 + x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1 x64GenContext.codeBuffer[jmpPatchOffset] = (x64GenContext.codeBufferIndex-(jmpPatchOffset-4)); //recompilerExit1: - x64Gen_pop_reg64(&x64GenContext, REG_R15); - x64Gen_pop_reg64(&x64GenContext, REG_R14); - x64Gen_pop_reg64(&x64GenContext, REG_R13); - x64Gen_pop_reg64(&x64GenContext, REG_R12); - x64Gen_pop_reg64(&x64GenContext, REG_R11); - x64Gen_pop_reg64(&x64GenContext, REG_R10); - x64Gen_pop_reg64(&x64GenContext, REG_R9); - x64Gen_pop_reg64(&x64GenContext, REG_R8); - x64Gen_pop_reg64(&x64GenContext, REG_RSI); - x64Gen_pop_reg64(&x64GenContext, REG_RDI); - x64Gen_pop_reg64(&x64GenContext, REG_RBP); - x64Gen_pop_reg64(&x64GenContext, REG_RBX); - x64Gen_pop_reg64(&x64GenContext, REG_RDX); - x64Gen_pop_reg64(&x64GenContext, REG_RCX); - x64Gen_pop_reg64(&x64GenContext, REG_RAX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R15); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R14); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R13); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R12); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R11); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R10); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R9); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R8); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RSI); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RDI); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RBP); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RBX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RDX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RCX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RAX); // RET x64Gen_ret(&x64GenContext); @@ -2473,10 +2473,10 @@ void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() // update instruction pointer // LR is in EDX - x64Emit_mov_mem32_reg32(&x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), REG_EDX); + x64Emit_mov_mem32_reg32(&x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX); // MOV RSP, [ppcRecompilerX64_rspTemp] - x64Emit_mov_reg64_mem64(&x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); + x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); // RET x64Gen_ret(&x64GenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 347f2ea1f..4cbf01a9f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -23,74 +23,48 @@ struct x64GenContext_t std::vector relocateOffsetTable2; }; -// Some of these are defined by winnt.h and gnu headers -#undef REG_EAX -#undef REG_ECX -#undef REG_EDX -#undef REG_EBX -#undef REG_ESP -#undef REG_EBP -#undef REG_ESI -#undef REG_EDI -#undef REG_NONE -#undef REG_RAX -#undef REG_RCX -#undef REG_RDX -#undef REG_RBX -#undef REG_RSP -#undef REG_RBP -#undef REG_RSI -#undef REG_RDI -#undef REG_R8 -#undef REG_R9 -#undef REG_R10 -#undef REG_R11 -#undef REG_R12 -#undef REG_R13 -#undef REG_R14 -#undef REG_R15 - -#define REG_EAX 0 -#define REG_ECX 1 -#define REG_EDX 2 -#define REG_EBX 3 -#define REG_ESP 4 // reserved for low half of hCPU pointer -#define REG_EBP 5 -#define REG_ESI 6 -#define REG_EDI 7 -#define REG_NONE -1 - -#define REG_RAX 0 -#define REG_RCX 1 -#define REG_RDX 2 -#define REG_RBX 3 -#define REG_RSP 4 // reserved for hCPU pointer -#define REG_RBP 5 -#define REG_RSI 6 -#define REG_RDI 7 -#define REG_R8 8 -#define REG_R9 9 -#define REG_R10 10 -#define REG_R11 11 -#define REG_R12 12 -#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -#define REG_R14 14 // reserved as temporary register -#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData - -#define REG_AL 0 -#define REG_CL 1 -#define REG_DL 2 -#define REG_BL 3 -#define REG_AH 4 -#define REG_CH 5 -#define REG_DH 6 -#define REG_BH 7 +// todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually +#define X86_REG_EAX 0 +#define X86_REG_ECX 1 +#define X86_REG_EDX 2 +#define X86_REG_EBX 3 +#define X86_REG_ESP 4 // reserved for low half of hCPU pointer +#define X86_REG_EBP 5 +#define X86_REG_ESI 6 +#define X86_REG_EDI 7 +#define X86_REG_NONE -1 + +#define X86_REG_RAX 0 +#define X86_REG_RCX 1 +#define X86_REG_RDX 2 +#define X86_REG_RBX 3 +#define X86_REG_RSP 4 // reserved for hCPU pointer +#define X86_REG_RBP 5 +#define X86_REG_RSI 6 +#define X86_REG_RDI 7 +#define X86_REG_R8 8 +#define X86_REG_R9 9 +#define X86_REG_R10 10 +#define X86_REG_R11 11 +#define X86_REG_R12 12 +#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) +#define X86_REG_R14 14 // reserved as temporary register +#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData + +#define X86_REG_AL 0 +#define X86_REG_CL 1 +#define X86_REG_DL 2 +#define X86_REG_BL 3 +#define X86_REG_AH 4 +#define X86_REG_CH 5 +#define X86_REG_DH 6 +#define X86_REG_BH 7 // reserved registers -#define REG_RESV_TEMP (REG_R14) -#define REG_RESV_HCPU (REG_RSP) -#define REG_RESV_MEMBASE (REG_R13) -#define REG_RESV_RECDATA (REG_R15) +#define REG_RESV_TEMP (X86_REG_R14) +#define REG_RESV_HCPU (X86_REG_RSP) +#define REG_RESV_MEMBASE (X86_REG_R13) +#define REG_RESV_RECDATA (X86_REG_R15) // reserved floating-point registers #define REG_RESV_FPR_TEMP (15) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 5bb2505d3..ed2fb7d9d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -10,11 +10,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -27,11 +27,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -75,7 +75,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, assert_dbg(); } // optimized code for ps float load - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memImmS32); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); @@ -116,8 +116,8 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, } else { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); + x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); } x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP); // load constant 1.0 into lower half and upper half of temp register @@ -179,7 +179,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, if (readSize == 16) { // half word - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memOffset); + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memOffset); x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap if (isSigned) x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); @@ -189,7 +189,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, else if (readSize == 8) { // byte - x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memOffset); + x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memOffset); if (isSigned) x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); else @@ -318,14 +318,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load value - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); } else { - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); @@ -339,31 +339,31 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); // calculate offset again x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); } else { // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); } } } @@ -422,9 +422,9 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); } if (g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, memReg, memImmS32, REG_RESV_TEMP); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, memReg, memImmS32, REG_RESV_TEMP); if (indexed) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); @@ -439,7 +439,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); - x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memImmS32); + x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); return; } // store as integer @@ -606,9 +606,9 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } if(g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); if( indexed ) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -622,15 +622,15 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti assert_dbg(); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); // store double low part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); // store double high part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); if( indexed ) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -645,12 +645,12 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti if( realRegisterMem == realRegisterMem2 ) assert_dbg(); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); } } else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || @@ -872,18 +872,18 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction // update cr sint32 crRegister = imlInstruction->crRegister; // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); + x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); + x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 33ff52ac1..1094693a9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -60,7 +60,7 @@ void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataReg forceUseOffset = true; } - if (memRegisterB64 == REG_NONE) + if (memRegisterB64 == X86_REG_NONE) { // memRegisterA64 + memImmS32 uint8 modRM = (dataRegister & 7) * 8 + (memRegisterA64 & 7); @@ -345,7 +345,7 @@ void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32) { // MOV QWORD [+], dataImmU32 - if( memRegister == REG_R14 ) + if( memRegister == X86_REG_R14 ) { sint32 memImmS32 = (sint32)memImmU32; if( memImmS32 == 0 ) @@ -377,7 +377,7 @@ void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8) { // MOV BYTE [+], dataImmU8 - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { sint32 memImmS32 = (sint32)memImmU32; if( memImmS32 >= -128 && memImmS32 <= 127 ) @@ -618,7 +618,7 @@ void _x64_op_reg64Low_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegist if (memRegister64 >= 8) x64Gen_writeU8(x64GenContext, 0x41); x64Gen_writeU8(x64GenContext, opByte); - _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, REG_NONE, memImmS32); + _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, X86_REG_NONE, memImmS32); } void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32) @@ -725,7 +725,7 @@ void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x05); @@ -765,7 +765,7 @@ void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x2D); @@ -804,7 +804,7 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis { // SUB , sint32 immS32 = (sint32)immU32; - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { if( memImmS32 >= 128 ) { @@ -875,7 +875,7 @@ void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x15); @@ -893,7 +893,7 @@ void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uin { // DEC dword [+imm] sint32 memoryImmS32 = (sint32)memoryImmU32; - if (memoryRegister != REG_RSP) + if (memoryRegister != X86_REG_RSP) assert_dbg(); // not supported yet if (memoryImmS32 >= -128 && memoryImmS32 <= 127) { @@ -974,7 +974,7 @@ void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x25); @@ -1019,7 +1019,7 @@ void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegi sint32 immS32 = (sint32)immU32; if( srcRegister >= 8 ) x64Gen_writeU8(x64GenContext, 0x41); - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0xA9); @@ -1045,7 +1045,7 @@ void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special RAX short form x64Gen_writeU8(x64GenContext, 0x3D); @@ -1075,7 +1075,7 @@ void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32) { // CMP , DWORD [+] - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { if( memImmS32 >= -128 && memImmS32 <= 127 ) assert_dbg(); // todo -> Shorter instruction form @@ -1105,7 +1105,7 @@ void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegist } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x0D); @@ -1165,7 +1165,7 @@ void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x35); @@ -1381,7 +1381,7 @@ void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sin { // SETcc [+imm] sint32 memoryImmS32 = (sint32)memoryImmU32; - if( memoryRegister != REG_RSP ) + if( memoryRegister != X86_REG_RSP ) assert_dbg(); // not supported if( memoryRegister >= 8 ) assert_dbg(); // not supported @@ -1620,7 +1620,7 @@ void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint3 { // BT [+imm], bitIndex (bit test) sint32 memoryImmS32 = (sint32)memoryImmU32; - if( memoryRegister != REG_RSP ) + if( memoryRegister != X86_REG_RSP ) assert_dbg(); // not supported yet if( memoryImmS32 >= -128 && memoryImmS32 <= 127 ) { @@ -1655,7 +1655,7 @@ void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32) void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32) { - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp index 06f79b9c3..882820e29 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp @@ -42,7 +42,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe // SSE2 // move two doubles from memory into xmm register // MOVUPD , [+] - if( memRegister == REG_ESP ) + if( memRegister == X86_REG_ESP ) { // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range // 66 0F 10 84 E4 23 01 00 00 @@ -54,7 +54,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { assert_dbg(); //x64Gen_writeU8(x64GenContext, 0x66); @@ -74,7 +74,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe // SSE2 // move two doubles from memory into xmm register // MOVUPD [+], - if( memRegister == REG_ESP ) + if( memRegister == X86_REG_ESP ) { // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range x64Gen_writeU8(x64GenContext, 0x66); @@ -85,7 +85,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { assert_dbg(); //x64Gen_writeU8(x64GenContext, 0x66); @@ -104,7 +104,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe { // SSE3 // move one double from memory into lower and upper half of a xmm register - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { // MOVDDUP , [+] // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range @@ -117,7 +117,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_R15 ) + else if( memRegister == X86_REG_R15 ) { // MOVDDUP , [+] // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range @@ -129,7 +129,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { // MOVDDUP , [] // 36 F2 0F 12 05 - 00 00 00 00 @@ -183,7 +183,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi { // SSE2 // move lower 64bits (double) of xmm register to memory location - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { // MOVSD [], // F2 0F 11 05 - 45 23 01 00 @@ -195,7 +195,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi //x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8); //x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_RSP ) + else if( memRegister == X86_REG_RSP ) { // MOVSD [RSP+], // F2 0F 11 84 24 - 33 22 11 00 @@ -217,7 +217,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg { // SSE3 // move one double from memory into lower half of a xmm register, leave upper half unchanged(?) - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { // MOVLPD , [] //x64Gen_writeU8(x64GenContext, 0x66); @@ -227,7 +227,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg //x64Gen_writeU32(x64GenContext, memImmU32); assert_dbg(); } - else if( memRegister == REG_RSP ) + else if( memRegister == X86_REG_RSP ) { // MOVLPD , [+] // 66 0F 12 84 24 - 33 22 11 00 @@ -346,11 +346,11 @@ void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if (memRegister == REG_NONE) + if (memRegister == X86_REG_NONE) { assert_dbg(); } - else if (memRegister == REG_R14) + else if (memRegister == X86_REG_R14) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45); @@ -402,7 +402,7 @@ void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // compare bottom double with double from memory location - if( memoryReg == REG_R15 ) + if( memoryReg == X86_REG_R15 ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); @@ -430,7 +430,7 @@ void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // compare bottom float with float from memory location - if (memoryReg == REG_R15) + if (memoryReg == X86_REG_R15) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); x64Gen_writeU8(x64GenContext, 0x0F); @@ -446,7 +446,7 @@ void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRe { // SSE2 // and xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false); x64Gen_writeU8(x64GenContext, 0x0F); @@ -462,7 +462,7 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // xor xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg? x64Gen_writeU8(x64GenContext, 0x0F); @@ -477,11 +477,11 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if (memRegister == REG_NONE) + if (memRegister == X86_REG_NONE) { assert_dbg(); } - else if (memRegister == REG_R14) + else if (memRegister == X86_REG_R14) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45); @@ -500,7 +500,7 @@ void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // and xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg? x64Gen_writeU8(x64GenContext, 0x0F); @@ -526,7 +526,7 @@ void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xm { // SSE2 // doubleword integer compare - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); @@ -608,7 +608,7 @@ void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xm { // SSE2 // converts two signed 32bit integers to two doubles - if( memReg == REG_RSP ) + if( memReg == X86_REG_RSP ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false); @@ -682,7 +682,7 @@ void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 838b61f50..b4fc62d09 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -274,18 +274,18 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) } IMLRegisterAllocatorParameters raParam; - raParam.physicalRegisterPool.SetAvailable(REG_RAX); - raParam.physicalRegisterPool.SetAvailable(REG_RDX); - raParam.physicalRegisterPool.SetAvailable(REG_RBX); - raParam.physicalRegisterPool.SetAvailable(REG_RBP); - raParam.physicalRegisterPool.SetAvailable(REG_RSI); - raParam.physicalRegisterPool.SetAvailable(REG_RDI); - raParam.physicalRegisterPool.SetAvailable(REG_R8); - raParam.physicalRegisterPool.SetAvailable(REG_R9); - raParam.physicalRegisterPool.SetAvailable(REG_R10); - raParam.physicalRegisterPool.SetAvailable(REG_R11); - raParam.physicalRegisterPool.SetAvailable(REG_R12); - raParam.physicalRegisterPool.SetAvailable(REG_RCX); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RAX); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RDX); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RBX); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RBP); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RSI); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RDI); + raParam.physicalRegisterPool.SetAvailable(X86_REG_R8); + raParam.physicalRegisterPool.SetAvailable(X86_REG_R9); + raParam.physicalRegisterPool.SetAvailable(X86_REG_R10); + raParam.physicalRegisterPool.SetAvailable(X86_REG_R11); + raParam.physicalRegisterPool.SetAvailable(X86_REG_R12); + raParam.physicalRegisterPool.SetAvailable(X86_REG_RCX); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); From ac22a38d681e3ca5e9dd3783350530824a6ecaf7 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 17 Dec 2022 20:59:00 +0100 Subject: [PATCH 21/64] PPCRec: New x86-64 code emitter --- src/Cafe/CMakeLists.txt | 1 + .../Recompiler/BackendX64/x86Emitter.h | 1260 +++++++++++++++++ 2 files changed, 1261 insertions(+) create mode 100644 src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 0ced95c54..805328e32 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -91,6 +91,7 @@ add_library(CemuCafe HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp HW/Espresso/Recompiler/BackendX64/BackendX64.h HW/Espresso/Recompiler/BackendX64/X64Emit.hpp + HW/Espresso/Recompiler/BackendX64/x86Emitter.h HW/Latte/Common/RegisterSerializer.cpp HW/Latte/Common/RegisterSerializer.h HW/Latte/Common/ShaderSerializer.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h new file mode 100644 index 000000000..4c67797c7 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -0,0 +1,1260 @@ +#pragma once + +// x86-64 assembler/emitter +// auto generated. Do not edit this file manually + +typedef unsigned long long u64; +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; +typedef signed long long s64; +typedef signed int s32; +typedef signed short s16; +typedef signed char s8; + +enum X86Reg : sint8 +{ + X86_REG_NONE = -1, + X86_REG_EAX = 0, + X86_REG_ECX = 1, + X86_REG_EDX = 2, + X86_REG_EBX = 3, + X86_REG_ESP = 4, + X86_REG_EBP = 5, + X86_REG_ESI = 6, + X86_REG_EDI = 7, + X86_REG_R8D = 8, + X86_REG_R9D = 9, + X86_REG_R10D = 10, + X86_REG_R11D = 11, + X86_REG_R12D = 12, + X86_REG_R13D = 13, + X86_REG_R14D = 14, + X86_REG_R15D = 15, + X86_REG_RAX = 0, + X86_REG_RCX = 1, + X86_REG_RDX = 2, + X86_REG_RBX = 3, + X86_REG_RSP = 4, + X86_REG_RBP = 5, + X86_REG_RSI = 6, + X86_REG_RDI = 7, + X86_REG_R8 = 8, + X86_REG_R9 = 9, + X86_REG_R10 = 10, + X86_REG_R11 = 11, + X86_REG_R12 = 12, + X86_REG_R13 = 13, + X86_REG_R14 = 14, + X86_REG_R15 = 15 +}; + +enum X86Cond : u8 +{ + X86_CONDITION_O = 0, + X86_CONDITION_NO = 1, + X86_CONDITION_B = 2, + X86_CONDITION_NB = 3, + X86_CONDITION_Z = 4, + X86_CONDITION_NZ = 5, + X86_CONDITION_BE = 6, + X86_CONDITION_NBE = 7, + X86_CONDITION_S = 8, + X86_CONDITION_NS = 9, + X86_CONDITION_PE = 10, + X86_CONDITION_PO = 11, + X86_CONDITION_L = 12, + X86_CONDITION_NL = 13, + X86_CONDITION_LE = 14, + X86_CONDITION_NLE = 15 +}; +class x86Assembler64 +{ +private: + std::vector m_buffer; + +public: + u8* GetBufferPtr() { return m_buffer.data(); }; + std::span GetBuffer() { return m_buffer; }; + u32 GetWriteIndex() { return (u32)m_buffer.size(); }; + void _emitU8(u8 v) { m_buffer.emplace_back(v); }; + void _emitU16(u16 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 2); *(u16*)(m_buffer.data() + writeIdx) = v; }; + void _emitU32(u32 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 4); *(u32*)(m_buffer.data() + writeIdx) = v; }; + void _emitU64(u64 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 8); *(u64*)(m_buffer.data() + writeIdx) = v; }; + using GPR64 = X86Reg; + using GPR32 = X86Reg; + using GPR8_REX = X86Reg; + void ADD_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x00); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x00); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x02); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x01); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x01); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x01); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x01); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x03); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x03); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x32); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void TEST_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xb8 | ((dst) & 7)); + _emitU32((u32)imm); + } + void MOV_qi64(GPR64 dst, s64 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xb8 | ((dst) & 7)); + _emitU64((u64)imm); + } + void CALL_q(GPR64 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xff); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + } + void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xff); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void Jcc_j32(X86Cond cond, s32 imm) + { + _emitU8(0x0f); + _emitU8(0x80 | (u8)cond); + _emitU32((u32)imm); + } + void SETcc_b(X86Cond cond, GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0x90 | (u8)cond); + _emitU8((3 << 6) | (dst & 7)); + } + void SETcc_b_l(X86Cond cond, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0x90); + _emitU8((mod << 6) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } +}; From 91f972753e05c418534801a97ca412df34a7da0c Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 17 Dec 2022 21:06:27 +0100 Subject: [PATCH 22/64] PPCRec: New compare and cond jump instrs, update RA Storing the condition result in a register instead of imitating PPC CR lets us simplify the backend a lot. Only implemented as PoC for BDZ/BDNZ so far. --- .../Recompiler/BackendX64/BackendX64.cpp | 251 +++++++++++------- .../Recompiler/BackendX64/BackendX64.h | 87 +++--- .../Recompiler/BackendX64/BackendX64FPU.cpp | 104 ++++---- .../Recompiler/BackendX64/BackendX64Gen.cpp | 32 +-- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 2 +- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 6 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 56 +++- .../Recompiler/IML/IMLInstruction.cpp | 48 +++- .../Espresso/Recompiler/IML/IMLInstruction.h | 115 ++++++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 4 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 62 +++-- .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 6 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 1 + .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 14 + .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 2 +- .../Recompiler/PPCRecompilerImlGen.cpp | 104 ++++---- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 12 +- src/config/LaunchSettings.cpp | 19 +- src/config/LaunchSettings.h | 7 + 19 files changed, 609 insertions(+), 323 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 79e1da8f9..849102941 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,13 +8,44 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +{ + return (x86Assembler64::GPR8_REX)physRegId; +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + + +X86Cond _x86Cond(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_Z; + case IMLCondition::NEQ: + return X86_CONDITION_NZ; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted */ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr) { - x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo); + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo); } /* @@ -37,7 +68,7 @@ void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunctio void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) { - uint8* instructionData = x64GenContext->codeBuffer + jumpInstructionOffset; + uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset; if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F)) { // far conditional jump @@ -241,7 +272,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); // check if cycles where decreased beyond zero, if yes -> leave recompiler x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); //x64Gen_int3(x64GenContext); //x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress); @@ -254,7 +285,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, //// JMP [recompilerCallTable+EAX/4*8] //x64Gen_int3(x64GenContext); x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // check if instruction pointer was changed // assign new instruction pointer to EAX x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); @@ -537,7 +568,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // realRegisterMem now holds EA x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); // EA matches reservation // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) @@ -569,7 +600,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex()); } else return false; @@ -690,16 +721,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) @@ -909,21 +940,21 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) - { - // registerResult -= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) - { - // do nothing -> SUB is for BDNZ instruction - } - else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // update cr register - assert_dbg(); - } - x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); - } + //else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) + //{ + // // registerResult -= immS32 + // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + // if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) + // { + // // do nothing -> SUB is for BDNZ instruction + // } + // else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) + // { + // // update cr register + // assert_dbg(); + // } + // x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + //} else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 @@ -1349,11 +1380,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA); x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 @@ -1361,11 +1392,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // do subtraction if( rRegOperandB == rRegOperandA ) { @@ -1419,7 +1450,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, for (sint32 b = 0; b < 6; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b)); - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set if (b == 5) { @@ -1432,7 +1463,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); } - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } @@ -1475,10 +1506,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, for(sint32 b=0; b<5; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } @@ -1512,15 +1543,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); - sint32 jumpInstructionJumpToSignedShift = x64GenContext->codeBufferIndex; + sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - //sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_EQUAL, 0); // unsigned shift (MSB of input register is not set) for(sint32 b=0; b<6; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set if( b == 5 ) { @@ -1531,24 +1560,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } - sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; + sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0); // signed shift - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex()); for(sint32 b=0; b<6; b++) { // check if we need to shift by (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set // set ca if any non-zero bit is shifted out x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<codeBufferIndex; + sint32 jumpInstructionJumpToAfterCa = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex()); // arithmetic shift if( b == 5 ) { @@ -1560,10 +1589,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); // update CR if requested // todo @@ -1693,22 +1722,67 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regR = _reg8(imlInstruction->op_compare.registerResult); + auto regA = _reg32(imlInstruction->op_compare.registerOperandA); + auto regB = _reg32(imlInstruction->op_compare.registerOperandB); + X86Cond cond = _x86Cond(imlInstruction->op_compare.cond); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_dd(regA, regB); + x64GenContext->emitter->SETcc_b(cond, regR); + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regR = _reg8(imlInstruction->op_compare_s32.registerResult); + auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA); + sint32 imm = imlInstruction->op_compare_s32.immS32; + X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_di32(regA, imm); + x64GenContext->emitter->SETcc_b(cond, regR); + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool); + bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue; + x64GenContext->emitter->TEST_bb(regBool, regBool); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + sint32 regResult = imlInstruction->op_r_r_s32.registerResult; + sint32 regOperand = imlInstruction->op_r_r_s32.registerA; + uint32 immS32 = imlInstruction->op_r_r_s32.immS32; + if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - // registerResult = registerOperand + immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); + if(regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SUB) + { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if (regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) { @@ -1733,9 +1807,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction assert_dbg(); } sint32 crRegister = imlInstruction->crRegister; - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -1761,11 +1832,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_not_reg64Low32(x64GenContext, rRegOperand); x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand); x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 @@ -1773,11 +1844,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // do actual computation of value, note: a - b is equivalent to a + ~b + 1 x64Gen_not_reg64Low32(x64GenContext, rRegResult); x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1); @@ -1839,12 +1910,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // SAR registerResult, SH x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); // JNS (if sign not set) - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here // MOV BYTE [ESP+xer_ca], 0 x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // jump destination - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); // CR update if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { @@ -2147,9 +2218,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // generate iml instruction code @@ -2157,7 +2225,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { x64GenContext.currentSegment = segIt; - segIt->x64Offset = x64GenContext.codeBufferIndex; + segIt->x64Offset = x64GenContext.emitter->GetWriteIndex(); for(size_t i=0; iimlList.size(); i++) { IMLInstruction* imlInstruction = segIt->imlList.data() + i; @@ -2198,9 +2266,24 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + { + codeGenerationFailed = true; + } + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + { + PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - if( PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) + if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) { codeGenerationFailed = true; } @@ -2324,11 +2407,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // handle failed code generation if( codeGenerationFailed ) { - free(x64GenContext.codeBuffer); return false; } // allocate executable memory - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); size_t baseAddress = (size_t)executableMemory; // fix relocs for(auto& relocIt : x64GenContext.relocateOffsetTable2) @@ -2341,7 +2423,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo x64Offset = destSegment->x64Offset; uint32 relocBase = relocIt.offset; - uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; + uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase; if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { // Jcc relativeImm32 @@ -2374,21 +2456,17 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); - x64GenContext.codeBuffer = nullptr; + std::span codeBuffer = x64GenContext.emitter->GetBuffer(); + memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes()); // set code PPCRecFunction->x86Code = executableMemory; - PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; + PPCRecFunction->x86Size = codeBuffer.size_bytes(); return true; } void PPCRecompilerX64Gen_generateEnterRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // start of recompiler entry function @@ -2419,7 +2497,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() x64Gen_writeU8(&x64GenContext, 0x83); x64Gen_writeU8(&x64GenContext, 0x04); x64Gen_writeU8(&x64GenContext, 0x24); - uint32 jmpPatchOffset = x64GenContext.codeBufferIndex; + uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex(); x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP); @@ -2434,7 +2512,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() //JMP recFunc x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1 - x64GenContext.codeBuffer[jmpPatchOffset] = (x64GenContext.codeBufferIndex-(jmpPatchOffset-4)); + x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4)); //recompilerExit1: x64Gen_pop_reg64(&x64GenContext, X86_REG_R15); @@ -2455,10 +2533,9 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() // RET x64Gen_ret(&x64GenContext); - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory; } @@ -2466,9 +2543,6 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 128; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // update instruction pointer @@ -2481,10 +2555,9 @@ void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() // RET x64Gen_ret(&x64GenContext); - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); return executableMemory; } diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 4cbf01a9f..eefd9da36 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -1,6 +1,8 @@ #include "../PPCRecompiler.h" // todo - get rid of dependency +#include "x86Emitter.h" + struct x64RelocEntry_t { x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {}; @@ -12,10 +14,18 @@ struct x64RelocEntry_t struct x64GenContext_t { IMLSegment* currentSegment{}; + x86Assembler64* emitter; + + x64GenContext_t() + { + emitter = new x86Assembler64(); + } + + ~x64GenContext_t() + { + delete emitter; + } - uint8* codeBuffer{}; - sint32 codeBufferIndex{}; - sint32 codeBufferSize{}; // cr state sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) @@ -24,41 +34,41 @@ struct x64GenContext_t }; // todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually -#define X86_REG_EAX 0 -#define X86_REG_ECX 1 -#define X86_REG_EDX 2 -#define X86_REG_EBX 3 -#define X86_REG_ESP 4 // reserved for low half of hCPU pointer -#define X86_REG_EBP 5 -#define X86_REG_ESI 6 -#define X86_REG_EDI 7 -#define X86_REG_NONE -1 - -#define X86_REG_RAX 0 -#define X86_REG_RCX 1 -#define X86_REG_RDX 2 -#define X86_REG_RBX 3 -#define X86_REG_RSP 4 // reserved for hCPU pointer -#define X86_REG_RBP 5 -#define X86_REG_RSI 6 -#define X86_REG_RDI 7 -#define X86_REG_R8 8 -#define X86_REG_R9 9 -#define X86_REG_R10 10 -#define X86_REG_R11 11 -#define X86_REG_R12 12 -#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -#define X86_REG_R14 14 // reserved as temporary register -#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData - -#define X86_REG_AL 0 -#define X86_REG_CL 1 -#define X86_REG_DL 2 -#define X86_REG_BL 3 -#define X86_REG_AH 4 -#define X86_REG_CH 5 -#define X86_REG_DH 6 -#define X86_REG_BH 7 +//#define X86_REG_EAX 0 +//#define X86_REG_ECX 1 +//#define X86_REG_EDX 2 +//#define X86_REG_EBX 3 +//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer +//#define X86_REG_EBP 5 +//#define X86_REG_ESI 6 +//#define X86_REG_EDI 7 +//#define X86_REG_NONE -1 +// +//#define X86_REG_RAX 0 +//#define X86_REG_RCX 1 +//#define X86_REG_RDX 2 +//#define X86_REG_RBX 3 +//#define X86_REG_RSP 4 // reserved for hCPU pointer +//#define X86_REG_RBP 5 +//#define X86_REG_RSI 6 +//#define X86_REG_RDI 7 +//#define X86_REG_R8 8 +//#define X86_REG_R9 9 +//#define X86_REG_R10 10 +//#define X86_REG_R11 11 +//#define X86_REG_R12 12 +//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) +//#define X86_REG_R14 14 // reserved as temporary register +//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData +// +//#define X86_REG_AL 0 +//#define X86_REG_CL 1 +//#define X86_REG_DL 2 +//#define X86_REG_BL 3 +//#define X86_REG_AH 4 -> Adressable via non-REX only +//#define X86_REG_CH 5 +//#define X86_REG_DH 6 +//#define X86_REG_BH 7 // reserved registers #define REG_RESV_TEMP (X86_REG_R14) @@ -72,6 +82,7 @@ struct x64GenContext_t #define reg32ToReg16(__x) (__x) +// deprecated condition flags enum { X86_CONDITION_EQUAL, // or zero diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index ed2fb7d9d..c7e11d42a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -217,16 +217,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // jump cases x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); // default case -> float @@ -237,31 +237,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen uint32 jumpOffset_endOfS8; PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; + jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); } // load from memory @@ -495,16 +495,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext } // max(i, -clampMin) x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0); x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // min(i, clampMax) x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0); x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // endian swap if( bitWriteSize == 16) x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); @@ -528,16 +528,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // jump cases x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); // default case -> float @@ -548,31 +548,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe uint32 jumpOffset_endOfS8; PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; + jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); } // store to memory @@ -873,18 +873,18 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction sint32 crRegister = imlInstruction->crRegister; // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { @@ -1102,50 +1102,50 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // select bottom x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_bottom = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C bottom x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2_bottom = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B bottom - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); // select top x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); // copy top to bottom (todo: May cause stall?) x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_top = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C top //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC, 2); - sint32 jumpInstructionOffset2_top = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B top - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB, 2); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); } else assert_dbg(); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 1094693a9..bc5f5f6ce 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -7,46 +7,22 @@ void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { - if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex++; + x64GenContext->emitter->_emitU8(v); } void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 2; + x64GenContext->emitter->_emitU16(v); } void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 4; + x64GenContext->emitter->_emitU32(v); } void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v) { - if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 8; + x64GenContext->emitter->_emitU64(v); } #include "x64Emit.hpp" diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 3dcd50b6f..650946f33 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -26,4 +26,4 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* // debug void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); -void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 160554d6f..fae49541c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -55,7 +55,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) } /* -* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister) +* Returns true if the instruction can overwrite CR (depending on value of ->crRegister) */ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) { @@ -63,6 +63,10 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) return true; + if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + return true; // ?? + if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + return true; // ?? if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 008c2fadb..72f706d96 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -121,6 +121,20 @@ std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) return ""; } +std::string IMLDebug_GetConditionName(IMLCondition cond) +{ + switch (cond) + { + case IMLCondition::EQ: + return "EQ"; + case IMLCondition::NEQ: + return "NEQ"; + default: + cemu_assert_unimplemented(); + } + return "ukn"; +} + void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -143,9 +157,12 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (printLivenessRangeInfo) { + strOutput.reset(); IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + debug_printf("%s\n", strOutput.c_str()); } //debug_printf("\n"); + strOutput.reset(); sint32 lineOffsetParameters = 18; @@ -207,6 +224,37 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_COMPARE) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandB); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerResult, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerOperandA); + strOutput.addFmt("{}", inst.op_compare_s32.immS32); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerResult, true); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + strOutput.add("CJUMP2 "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); + if(!inst.op_conditionalJump2.mustBeTrue) + strOutput.add("(inverted)"); + } else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); @@ -369,7 +417,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else strOutput.add("U"); strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); + strOutput.addFmt(" = fpr_t{} mode {}", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { @@ -388,7 +436,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK\n"); + strOutput.addFmt("CYCLE_CHECK"); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { @@ -460,11 +508,11 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool debug_printf("\n"); } -void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) +void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo) { for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false); + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo); debug_printf("\n"); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index f471c827a..2084d1680 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -141,6 +141,21 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else cemu_assert_unimplemented(); } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + registersUsed->readNamedReg1 = op_compare.registerOperandA; + registersUsed->readNamedReg2 = op_compare.registerOperandB; + registersUsed->writtenNamedReg1 = op_compare.registerResult; + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + registersUsed->readNamedReg1 = op_compare_s32.registerOperandA; + registersUsed->writtenNamedReg1 = op_compare_s32.registerResult; + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + registersUsed->readNamedReg1 = op_conditionalJump2.registerBool; + } else if (type == PPCREC_IML_TYPE_LOAD) { registersUsed->writtenNamedReg1 = op_storeLoad.registerData; @@ -455,17 +470,30 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (type == PPCREC_IML_TYPE_R_R_S32) { - // in all cases result is written and other operand is read only op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_R_R_R) { - // in all cases result is written and other operands are read only op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced); + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers @@ -627,13 +655,17 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + // not affected + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_NO_OP) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_MACRO) { @@ -737,13 +769,17 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + // not affected + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_NO_OP) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_MACRO) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 89f14af42..a9245baad 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -4,8 +4,6 @@ enum { PPCREC_IML_OP_ASSIGN, // '=' operator PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_ADD, // '+' operator - PPCREC_IML_OP_SUB, // '-' operator PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) @@ -85,8 +83,19 @@ enum // PS PPCREC_IML_OP_FPR_SUM0, PPCREC_IML_OP_FPR_SUM1, -}; + + + // working towards defining ops per-form + // R_R_R only + + // R_R_S32 only + + // R_R_R + R_R_S32 + PPCREC_IML_OP_ADD, + PPCREC_IML_OP_SUB, + +}; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) enum @@ -118,6 +127,19 @@ enum }; +enum class IMLCondition : uint8 +{ + EQ, + NEQ, + SIGNED_GT, + SIGNED_LT, + UNSIGNED_GT, + UNSIGNED_LT, + + SIGNED_OVERFLOW, + SIGNED_NOVERFLOW, +}; + enum { PPCREC_CR_MODE_COMPARE_SIGNED, @@ -131,7 +153,7 @@ enum { PPCREC_IML_TYPE_NONE, PPCREC_IML_TYPE_NO_OP, // no-op instruction - PPCREC_IML_TYPE_R_R, // r* (op) *r + PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r) PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] @@ -145,6 +167,12 @@ enum PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) + + // new style of handling conditions and branches: + PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* + PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register + // conditional PPCREC_IML_TYPE_CONDITIONAL_R_S32, // FPR @@ -294,11 +322,6 @@ struct IMLInstruction sint32 immS32; }op_r_immS32; struct - { - uint32 address; - uint8 flags; - }op_jumpmark; - struct { uint32 param; uint32 param2; @@ -310,7 +333,7 @@ struct IMLInstruction uint8 crRegisterIndex; uint8 crBitIndex; bool bitMustBeSet; - }op_conditionalJump; + }op_conditionalJump; // legacy jump struct { uint8 registerData; @@ -353,16 +376,30 @@ struct IMLInstruction uint8 registerResult; }op_fpr_r; struct - { - uint32 ppcAddress; - uint32 x64Offset; - }op_ppcEnter; - struct { uint8 crD; // crBitIndex (result) uint8 crA; // crBitIndex uint8 crB; // crBitIndex }op_cr; + struct + { + uint8 registerResult; // stores the boolean result of the comparison + uint8 registerOperandA; + uint8 registerOperandB; + IMLCondition cond; + }op_compare; + struct + { + uint8 registerResult; // stores the boolean result of the comparison + uint8 registerOperandA; + sint32 immS32; + IMLCondition cond; + }op_compare_s32; + struct + { + uint8 registerBool; + bool mustBeTrue; + }op_conditionalJump2; // conditional operations (emitted if supported by target platform) struct { @@ -385,7 +422,8 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP || - type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || + type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) return true; return false; } @@ -432,6 +470,17 @@ struct IMLInstruction this->op_r_r.registerA = registerA; } + + void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint32 crMode = 0) + { + this->type = PPCREC_IML_TYPE_R_S32; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_immS32.registerIndex = registerIndex; + this->op_r_immS32.immS32 = immS32; + } + void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) { // operation with three register operands (e.g. "t0 = t1 + t4") @@ -456,6 +505,40 @@ struct IMLInstruction this->op_r_r_s32.immS32 = immS32; } + void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_compare.registerResult = registerResult; + this->op_compare.registerOperandA = registerA; + this->op_compare.registerOperandB = registerB; + this->op_compare.cond = cond; + } + + void make_compare_s32(uint8 registerA, sint32 immS32, uint8 registerResult, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE_S32; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_compare_s32.registerResult = registerResult; + this->op_compare_s32.registerOperandA = registerA; + this->op_compare_s32.immS32 = immS32; + this->op_compare_s32.cond = cond; + } + + void make_conditional_jump_new(uint8 registerBool, bool mustBeTrue) + { + this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_conditionalJump2.registerBool = registerBool; + this->op_conditionalJump2.mustBeTrue = mustBeTrue; + } + // load from memory void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index ae3c6c799..2cbcb0c1d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -835,9 +835,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp } if (foundMatch) { - // insert expand instruction - IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, i); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); + PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 1b720d26d..8c7c807d4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -218,10 +218,20 @@ typedef struct sint32 liveRangesCount; }raLiveRangeInfo_t; +bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) +{ + if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) + return true; + if ((rangeA->start.index == RA_INTER_RANGE_START && rangeA->start.index == rangeB->start.index)) + return true; + if (rangeA->end.index == RA_INTER_RANGE_END && rangeA->end.index == rangeB->end.index) + return true; + return false; +} + // mark occupied registers by any overlapping range as unavailable in physRegSet void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) { - //uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { IMLSegment* imlSegment = subrange->imlSegment; @@ -235,9 +245,10 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IML continue; } - if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || - (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || - (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) + //if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || + // (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || + // (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) + if(IsRangeOverlapping(subrange, subrangeItr)) { if (subrangeItr->range->physicalRegister >= 0) physRegSet.SetReserved(subrangeItr->range->physicalRegister); @@ -272,19 +283,6 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) } // sort std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare); - //for (sint32 i1 = 0; i1 < count; i1++) - //{ - // for (sint32 i2 = i1+1; i2 < count; i2++) - // { - // if (subrangeList[i1]->start.index > subrangeList[i2]->start.index) - // { - // // swap - // raLivenessSubrange_t* temp = subrangeList[i1]; - // subrangeList[i1] = subrangeList[i2]; - // subrangeList[i2] = temp; - // } - // } - //} // reassemble linked list subrangeList[count] = nullptr; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; @@ -478,6 +476,7 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen } } // evaluate strategy: Split current range to fit in available holes + // todo - are checks required to avoid splitting on the suffix instruction? spillStrategies.availableRegisterHole.cost = INT_MAX; spillStrategies.availableRegisterHole.distance = -1; spillStrategies.availableRegisterHole.physRegister = -1; @@ -770,6 +769,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, liveInfo.liveRangesCount = 0; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; + //sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) @@ -806,7 +806,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - // store GPR + // store GPR if required + // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (liverange->hasStore) { PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); @@ -827,6 +828,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // load GPR + // similar to stores, any loads for the next segment need to happen before the suffix instruction + // however, starting 17-12-2022 ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) + // the limitation that name loads (for the follow-up segments) need to happen before the suffix instruction require that the range also reflects this, otherwise the RA would erroneously assume registers to be available during the suffix instruction + if (imlSegment->HasSuffixInstruction()) + { + cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex()); + } if (subrangeItr->_noLoad == false) { PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); @@ -839,7 +847,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } - // replace registers + // rewrite registers + // todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table if (index < imlSegment->imlList.size()) { IMLUsedRegisters gprTracking; @@ -1004,7 +1013,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ctx, ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); @@ -1095,6 +1103,15 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); } } + // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction + // this is due to range load instructions being inserted before the suffix instruction + if (subrange->end.index == RA_INTER_RANGE_END) + { + if (imlSegment->HasSuffixInstruction()) + { + cemu_assert_debug(subrange->start.index <= imlSegment->GetSuffixInstructionIndex()); + } + } return subrange; } @@ -1155,7 +1172,10 @@ void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IM { if (_isRangeDefined(imlSegment, vGPR) == false) { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; + if(imlSegment->HasSuffixInstruction()) + imlSegment->raDistances.reg[vGPR].usageStart = imlSegment->GetSuffixInstructionIndex(); + else + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; return; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index 2b2c56a21..f3b6834f0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -16,6 +16,12 @@ bool IMLSegment::HasSuffixInstruction() const return imlInstruction.IsSuffixInstruction(); } +sint32 IMLSegment::GetSuffixInstructionIndex() const +{ + cemu_assert_debug(HasSuffixInstruction()); + return (sint32)(imlList.size() - 1); +} + IMLInstruction* IMLSegment::GetLastInstruction() { if (imlList.empty()) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 7ea7903bc..add7098ed 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -135,6 +135,7 @@ struct IMLSegment IMLInstruction* AppendInstruction(); bool HasSuffixInstruction() const; + sint32 GetSuffixInstructionIndex() const; IMLInstruction* GetLastInstruction(); // segment points diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index b4fc62d09..ed8bee876 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -167,6 +167,20 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + + uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr(); + uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr(); + + if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0) + { + + if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr) + { + delete ppcRecFunc; + return nullptr; + } + } + // apply passes if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 4843fd1cc..0a5bd8abd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -28,7 +28,7 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe // IML instruction generation void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); +void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode); void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 40844bb22..c5ec96c88 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -58,15 +58,15 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) { - // operation with two register operands (e.g. "t0 = t1") - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r.registerResult = registerResult; - imlInstruction->op_r_r.registerA = registerA; + if (imlInstruction) + __debugbreak(); // not supported + + ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode); +} + +void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode) +{ + ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) @@ -89,20 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcIm imlInstruction->op_r_name.name = name; } -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode) -{ - // two variations: - // operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register) - // operation with store (e.g. "'r3' = 123") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_immS32.registerIndex = registerIndex; - imlInstruction->op_r_immS32.immS32 = immS32; -} - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { if(imlInstruction == NULL) @@ -292,6 +278,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenCo return registerIndex; } +// get throw-away register. Only valid for the scope of a single translated instruction +// be careful to not collide with manually loaded temporary register +uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGenContext, uint32 temporaryIndex) +{ + return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); +} + /* * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register @@ -407,7 +400,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -417,7 +410,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crMask; PPC_OPC_TEMPL_XFX(opcode, rS, crMask); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -453,7 +446,7 @@ void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); } void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -466,7 +459,7 @@ void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) @@ -628,11 +621,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition == false ) return false; // not supported for the moment uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); - if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); - else - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); + ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); + ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); return true; } else @@ -709,7 +701,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco branchDestReg = tmpRegister; } uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); } if (!BO.decrementerIgnore()) @@ -901,7 +893,7 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // rA not used, instruction is value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -924,7 +916,7 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // rA not used, instruction turns into simple value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -1170,15 +1162,15 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc if( registerRA != registerRS ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS); if( SH != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, PPC_REC_INVALID_REGISTER, 0); if(opcode&PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); } else { if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); } return true; } @@ -1213,12 +1205,12 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); } else { if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); } return true; } @@ -1438,7 +1430,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1482,7 +1474,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1527,7 +1519,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1571,7 +1563,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1880,12 +1872,12 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1924,12 +1916,12 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1968,12 +1960,12 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store byte PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX) @@ -2481,7 +2473,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); } void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2496,7 +2488,7 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); } bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2623,7 +2615,7 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2639,7 +2631,7 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2655,7 +2647,7 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2671,7 +2663,7 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index fddc5293b..da53ea556 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -163,7 +163,7 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -258,7 +258,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // emit load iml @@ -326,7 +326,7 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -412,7 +412,7 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -1114,7 +1114,7 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired load @@ -1165,7 +1165,7 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store diff --git a/src/config/LaunchSettings.cpp b/src/config/LaunchSettings.cpp index bf38b9cf2..53ab5e19b 100644 --- a/src/config/LaunchSettings.cpp +++ b/src/config/LaunchSettings.cpp @@ -13,6 +13,7 @@ #include "util/crypto/aes128.h" #include "Cafe/Filesystem/FST/FST.h" +#include "util/helpers/StringHelpers.h" void requireConsole(); @@ -74,7 +75,9 @@ bool LaunchSettings::HandleCommandline(const std::vector& args) po::options_description hidden{ "Hidden options" }; hidden.add_options() ("nsight", po::value()->implicit_value(true), "NSight debugging options") - ("legacy", po::value()->implicit_value(true), "Intel legacy graphic mode"); + ("legacy", po::value()->implicit_value(true), "Intel legacy graphic mode") + ("ppcrec-lower-addr", po::value(), "For debugging: Lower address allowed for PPC recompilation") + ("ppcrec-upper-addr", po::value(), "For debugging: Upper address allowed for PPC recompilation"); po::options_description extractor{ "Extractor tool" }; extractor.add_options() @@ -186,6 +189,20 @@ bool LaunchSettings::HandleCommandline(const std::vector& args) if (vm.count("output")) log_path = vm["output"].as(); + // recompiler range limit for debugging + if (vm.count("ppcrec-lower-addr")) + { + uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-lower-addr"].as()); + ppcRec_limitLowerAddr = addr; + } + if (vm.count("ppcrec-upper-addr")) + { + uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-upper-addr"].as()); + ppcRec_limitUpperAddr = addr; + } + if(ppcRec_limitLowerAddr != 0 && ppcRec_limitUpperAddr != 0) + cemuLog_log(LogType::Force, "PPCRec range limited to 0x{:08x}-0x{:08x}", ppcRec_limitLowerAddr, ppcRec_limitUpperAddr); + if(!extract_path.empty()) { ExtractorTool(extract_path, output_path, log_path); diff --git a/src/config/LaunchSettings.h b/src/config/LaunchSettings.h index b0f673a13..074fbb91b 100644 --- a/src/config/LaunchSettings.h +++ b/src/config/LaunchSettings.h @@ -29,6 +29,9 @@ class LaunchSettings static std::optional GetPersistentId() { return s_persistent_id; } + static uint32 GetPPCRecLowerAddr() { return ppcRec_limitLowerAddr; }; + static uint32 GetPPCRecUpperAddr() { return ppcRec_limitUpperAddr; }; + private: inline static std::optional s_load_game_file{}; inline static std::optional s_load_title_id{}; @@ -44,6 +47,10 @@ class LaunchSettings inline static std::optional s_persistent_id{}; + // for recompiler debugging + inline static uint32 ppcRec_limitLowerAddr{}; + inline static uint32 ppcRec_limitUpperAddr{}; + static bool ExtractorTool(std::wstring_view wud_path, std::string_view output_path, std::wstring_view log_path); }; From 2535cf43d83ca72846d5ac2b4a32340f32be1bb5 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 19 Dec 2022 00:08:10 +0100 Subject: [PATCH 23/64] PPCRec: Streamline instructions + unify code for CR updates --- .../Recompiler/BackendX64/BackendX64.cpp | 121 +-- .../Recompiler/IML/IMLInstruction.cpp | 5 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 12 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 5 - .../Recompiler/PPCRecompilerImlGen.cpp | 763 +++++++----------- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 18 +- 6 files changed, 336 insertions(+), 588 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 849102941..461531f98 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -614,7 +614,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult = registerA if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + if(imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) { // since MOV doesn't set eflags we need another test instruction @@ -634,39 +635,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { - // registerResult = endianSwap32(registerA) if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) assert_dbg(); x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) + else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - // registerResult += registerA - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) + else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation\n"); - assert_dbg(); - } - } + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { @@ -683,33 +670,28 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult ^= registerA x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // NOT destination register x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); - // update cr bits - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // NOT instruction does not update flags, so we have to generate an additional TEST instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } + } + else if (imlInstruction->operation == PPCREC_IML_OP_NEG) + { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + // copy register content if different registers + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -777,23 +759,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else assert_dbg(); } - else if( imlInstruction->operation == PPCREC_IML_OP_NEG ) - { - // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // NEG destination register - x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); - // update cr bits - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); @@ -872,26 +837,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } } } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ) - { - // registerResult = (uint32)(sint32)(sint16)registerA - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation\n"); - assert_dbg(); - } - } - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); @@ -930,31 +875,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) - { - // registerResult += immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); - } - //else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) - //{ - // // registerResult -= immS32 - // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) - // { - // // do nothing -> SUB is for BDNZ instruction - // } - // else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - // { - // // update cr register - // assert_dbg(); - // } - // x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); - //} else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 @@ -2568,3 +2488,4 @@ void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions() PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited); } + diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 2084d1680..d4cfdcb12 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -35,8 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_OR || operation == PPCREC_IML_OP_AND || operation == PPCREC_IML_OP_XOR || - operation == PPCREC_IML_OP_ADD || - operation == PPCREC_IML_OP_ADD_CARRY || + operation == PPCREC_IML_OP_ADD_CARRY || // r_r carry stuff is deprecated operation == PPCREC_IML_OP_ADD_CARRY_ME || operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) { @@ -68,7 +67,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // operand register is read only registersUsed->readNamedReg1 = op_r_immS32.registerIndex; } - else if (operation == PPCREC_IML_OP_ADD || + else if (operation == PPCREC_IML_OP_ADD || // deprecated operation == PPCREC_IML_OP_SUB || operation == PPCREC_IML_OP_AND || operation == PPCREC_IML_OP_OR || diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index a9245baad..54c31ee4d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -16,9 +16,6 @@ enum PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag - // assign operators with cast - PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend - PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend // binary operation PPCREC_IML_OP_OR, // '|' operator PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first @@ -27,8 +24,6 @@ enum PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator PPCREC_IML_OP_LEFT_SHIFT, // shift left operator PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) - PPCREC_IML_OP_NOT, // complement each bit - PPCREC_IML_OP_NEG, // negate // ppc PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) @@ -95,6 +90,13 @@ enum PPCREC_IML_OP_ADD, PPCREC_IML_OP_SUB, + // R_R only + PPCREC_IML_OP_NOT, + PPCREC_IML_OP_NEG, + PPCREC_IML_OP_ASSIGN_S16_TO_S32, + PPCREC_IML_OP_ASSIGN_S8_TO_S32, + + }; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 0a5bd8abd..a4dd41016 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,6 +1,3 @@ - -#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) - bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker); IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); @@ -30,8 +27,6 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode); void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); - // IML instruction generation (new style, can generate new instructions but also overwrite existing ones) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index c5ec96c88..420bf194a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -69,16 +69,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcIml ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode); } -void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) -{ - ppcImlGenContext->emitInst().make_r_r_r(operation, registerResult, registerA, registerB, crRegister, crMode); -} - -void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) -{ - ppcImlGenContext->emitInst().make_r_r_s32(operation, registerResult, registerA, immS32, crRegister, crMode); -} - void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) { // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) @@ -314,6 +304,85 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe return registerIndex; } +bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) +{ + for (sint32 i = 0; i < 6; i++) + { + uint32 opcode = memory_readU32(functionPtr + i * 4); + switch ((opcode >> 26)) + { + case 14: // ADDI + case 15: // ADDIS + continue; + case 19: // opcode category 19 + switch (PPC_getBits(opcode, 30, 10)) + { + case 16: + if (opcode == 0x4E800020) + { + *functionInstructionCount = i; + return true; // BLR + } + return false; + } + return false; + case 32: // LWZ + case 33: // LWZU + case 34: // LBZ + case 35: // LBZU + case 36: // STW + case 37: // STWU + case 38: // STB + case 39: // STBU + case 40: // LHZ + case 41: // LHZU + case 42: // LHA + case 43: // LHAU + case 44: // STH + case 45: // STHU + case 46: // LMW + case 47: // STMW + case 48: // LFS + case 49: // LFSU + case 50: // LFD + case 51: // LFDU + case 52: // STFS + case 53: // STFSU + case 54: // STFD + case 55: // STFDU + continue; + default: + return false; + } + } + return false; +} + +void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uint32 startAddress, sint32 instructionCount) +{ + for (sint32 i = 0; i < instructionCount; i++) + { + ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i * 4; + ppcImlGenContext->cyclesSinceLastBranch++; + if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) + { + cemu_assert_suspicious(); + } + } + // add range + cemu_assert_unimplemented(); + //ppcRecRange_t recRange; + //recRange.ppcAddress = startAddress; + //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR + //ppcImlGenContext->functionRef->list_ranges.push_back(recRange); +} + +// for handling RC bit of many instructions +void PPCImlGen_UpdateCR0Logical(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) +{ + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); +} + void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { // split before and after to make sure the macro is in an isolated segment that we can make enterable @@ -336,7 +405,7 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (gprReg == PPC_REC_INVALID_REGISTER) gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { @@ -344,7 +413,7 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (gprReg == PPC_REC_INVALID_REGISTER) gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; } else @@ -361,13 +430,13 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else return false; @@ -400,7 +469,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -410,7 +479,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crMask; PPC_OPC_TEMPL_XFX(opcode, rS, crMask); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -422,7 +491,7 @@ void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode cr >>= 2; uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); } void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -433,7 +502,7 @@ void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod cr >>= 2; uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); } void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -446,7 +515,7 @@ void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); } void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -459,80 +528,7 @@ void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - -bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) -{ - for (sint32 i = 0; i < 6; i++) - { - uint32 opcode = memory_readU32(functionPtr+i*4); - switch ((opcode >> 26)) - { - case 14: // ADDI - case 15: // ADDIS - continue; - case 19: // opcode category 19 - switch (PPC_getBits(opcode, 30, 10)) - { - case 16: - if (opcode == 0x4E800020) - { - *functionInstructionCount = i; - return true; // BLR - } - return false; - } - return false; - case 32: // LWZ - case 33: // LWZU - case 34: // LBZ - case 35: // LBZU - case 36: // STW - case 37: // STWU - case 38: // STB - case 39: // STBU - case 40: // LHZ - case 41: // LHZU - case 42: // LHA - case 43: // LHAU - case 44: // STH - case 45: // STHU - case 46: // LMW - case 47: // STMW - case 48: // LFS - case 49: // LFSU - case 50: // LFD - case 51: // LFDU - case 52: // STFS - case 53: // STFSU - case 54: // STFD - case 55: // STFDU - continue; - default: - return false; - } - } - return false; -} - -void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uint32 startAddress, sint32 instructionCount) -{ - for (sint32 i = 0; i < instructionCount; i++) - { - ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i*4; - ppcImlGenContext->cyclesSinceLastBranch++; - if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) - { - cemu_assert_suspicious(); - } - } - // add range - cemu_assert_unimplemented(); - //ppcRecRange_t recRange; - //recRange.ppcAddress = startAddress; - //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR - //ppcImlGenContext->functionRef->list_ranges.push_back(recRange); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); } bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -697,11 +693,11 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // if the branch target is LR, then preserve it in a temporary cemu_assert_suspicious(); // this case needs testing uint32 tmpRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, nullptr, PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); branchDestReg = tmpRegister; } uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); } if (!BO.decrementerIgnore()) @@ -782,11 +778,11 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); } return true; } @@ -800,9 +796,9 @@ bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); return true; } @@ -815,9 +811,9 @@ bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); return true; } @@ -835,15 +831,15 @@ bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // move rA to rD if( registerRA != registerRD ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); } if( opcode&PPC_OPC_RC ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); } return true; } @@ -862,15 +858,15 @@ bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // move rA to rD if( registerRA != registerRD ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); } if( opcode&PPC_OPC_RC ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); } return true; } @@ -886,14 +882,14 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, imm); } else { // rA not used, instruction is value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -909,14 +905,14 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); } else { // rA not used, instruction turns into simple value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -931,7 +927,7 @@ bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); // never updates any cr return true; } @@ -946,7 +942,7 @@ bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm, 0, PPCREC_CR_MODE_LOGICAL); return true; } @@ -960,9 +956,9 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); return true; } @@ -975,9 +971,9 @@ bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); return true; } @@ -990,9 +986,9 @@ bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); return true; } @@ -1005,9 +1001,9 @@ bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB); if (opcode & PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -1021,7 +1017,7 @@ bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // cr0 is never affected uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm); return true; } @@ -1033,7 +1029,7 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // mulli instruction does not modify any flags uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); uint32 registerOperand = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm); return true; } @@ -1050,9 +1046,9 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); return true; } @@ -1065,9 +1061,9 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); return true; } @@ -1080,9 +1076,9 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); return true; } @@ -1096,11 +1092,11 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); } return true; } @@ -1115,11 +1111,11 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); } return true; } @@ -1144,34 +1140,29 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { // SLWI if(opcode&PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); return true; } else if( SH == (32-MB) && ME == 31 ) { // SRWI if(opcode&PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); return true; } // general handler if( registerRA != registerRS ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRA, registerRS); if( SH != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, PPC_REC_INVALID_REGISTER, 0); - if(opcode&PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); - } + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH); + if( mask != 0xFFFFFFFF ) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1184,9 +1175,9 @@ bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // pack RLWIMI parameters into single integer uint32 vImm = MB|(ME<<8)|(SH<<16); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm, PPC_REC_INVALID_REGISTER, 0); if (opcode & PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRA, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1202,16 +1193,11 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); + if( mask != 0xFFFFFFFF ) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); - } + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1219,36 +1205,13 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //uint32 SH = hCPU->gpr[rB] & 0x3f; - //hCPU->gpr[rA] = hCPU->gpr[rS]; - //hCPU->xer_ca = 0; - //if (hCPU->gpr[rA] & 0x80000000) { - // uint32 ca = 0; - // for (uint32 i=0; i < SH; i++) { - // if (hCPU->gpr[rA] & 1) ca = 1; - // hCPU->gpr[rA] >>= 1; - // hCPU->gpr[rA] |= 0x80000000; - // } - // if (ca) hCPU->xer_ca = 1; - //} else { - // if (SH > 31) { - // hCPU->gpr[rA] = 0; - // } else { - // hCPU->gpr[rA] >>= SH; - // } - //} - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rA]); - //} - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( (opcode&PPC_OPC_RC) != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); return true; } @@ -1261,9 +1224,9 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH, 0, PPCREC_CR_MODE_LOGICAL); else - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); return true; } @@ -1277,11 +1240,11 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); } return true; } @@ -1296,16 +1259,15 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); } return true; } - bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int rS, rA, rB; @@ -1313,14 +1275,9 @@ bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_ASSERT(rB==0); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if ( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1330,14 +1287,9 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if ( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1346,29 +1298,11 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); PPC_ASSERT(rB==0); - if( opcode&PPC_OPC_RC ) - { - return false; - } uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_CNTLZW, registerRA, registerRS); - - //uint32 n=0; - //uint32 x=0x80000000; - //uint32 v=hCPU->gpr[rS]; - //while (!(v & x)) { - // n++; - // if (n==32) break; - // x>>=1; - //} - //hCPU->gpr[rA] = n; - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rA]); - //} - - + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, registerRA, registerRS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1377,21 +1311,12 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); PPC_ASSERT(rB == 0); - //hCPU->gpr[rD] = -((signed int)hCPU->gpr[rA]); - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rD]); - //} + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, registerRD, registerRA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -1430,7 +1355,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1474,7 +1399,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1519,7 +1444,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1563,7 +1488,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1609,7 +1534,7 @@ bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load word PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 32, false, true); return true; @@ -1676,7 +1601,7 @@ bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load half word PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, true, true); return true; @@ -1722,7 +1647,7 @@ bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load hald word PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, false, true); return true; @@ -1786,7 +1711,7 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (destinationRegister == PPC_REC_INVALID_REGISTER) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load byte PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 8, false, true); return true; @@ -1872,12 +1797,12 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); } void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1916,12 +1841,12 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); } void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1960,12 +1885,12 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store byte PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX) @@ -2007,7 +1932,7 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte // store word PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, true); // update EA after store - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); return true; } // prepare registers @@ -2015,7 +1940,7 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 sourceRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // update EA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegisterA, 0, storeBitWidth, true); return true; @@ -2159,9 +2084,9 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); // store if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB); return true; } @@ -2177,27 +2102,12 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); } - else + if ((opcode & PPC_OPC_RC)) { - if( opcode&PPC_OPC_RC ) - { - // no effect but CR is updated - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprSourceReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - // no-op - } + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } } else @@ -2210,123 +2120,93 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { // make sure we don't overwrite rS or rA if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - // fixme: merge CR update into OR instruction above - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); } else { // rA = rS if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); // rA |= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); } + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } return true; } -bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // hCPU->gpr[rA] = hCPU->gpr[rS] | ~hCPU->gpr[rB]; - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg); - return true; -} - bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); //hCPU->gpr[rA] = ~(hCPU->gpr[rS] | hCPU->gpr[rB]); // check for NOT mnemonic - if( rS == rB ) + if (rS == rB) { // simple register copy with NOT - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - } + sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + if (gprDestReg != gprSourceReg) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS | rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) + sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + if (gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg) { // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) + if (gprSource1Reg == gprDestReg) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if( opcode&PPC_OPC_RC ) - { - // fixme: merge CR update into OR instruction above - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS - if( gprDestReg != gprSource1Reg ) + if (gprDestReg != gprSource1Reg) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); } // rA |= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } } return true; } +bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rS, rA, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + // hCPU->gpr[rA] = hCPU->gpr[rS] | ~hCPU->gpr[rB]; + sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + return true; +} + bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rS, rA, rB; @@ -2339,14 +2219,9 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { @@ -2364,33 +2239,26 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode // make sure we don't overwrite rS or rA if( gprSource1Reg == gprDestReg ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); } else { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg); } + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS if( gprDestReg != gprSource1Reg ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); } // rA &= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } } return true; @@ -2406,57 +2274,42 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { // result is always 0 -> replace with XOR rA,rA sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else if( rA == rB ) { // rB already in rA, therefore we complement rA first and then AND it with rS sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = ~rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); // rA &= rS - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprRS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // a & (~b) is the same as ~((~a) | b) - sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); sint32 gprRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); sint32 gprRS = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // move rS to rA (if required) - if( gprRA != gprRS ) + if( gprDestReg != gprRS ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprRA, gprRS); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprRS); } // rS already in rA, therefore we complement rS first and then OR it with rB // rA = ~rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); // rA |= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprRA, gprRB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprRB); // rA = ~rA - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } return true; } @@ -2466,14 +2319,15 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ANDI. always sets cr0 flags sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); + // ANDI. always sets cr0 + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2481,14 +2335,15 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ANDI. always sets cr0 flags sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); + // ANDIS. always sets cr0 + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2499,14 +2354,9 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // xor register with itself sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { @@ -2518,34 +2368,21 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // make sure we don't overwrite rS or rA if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); // rA ^= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } } return true; @@ -2560,11 +2397,10 @@ bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // xor register with itself, then invert sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { @@ -2576,28 +2412,23 @@ bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // make sure we don't overwrite rS or rA if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); } else { // rA = rS if( gprDestReg != gprSource1Reg ) { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); } // rA ^= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); } - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } return true; } @@ -2613,9 +2444,9 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2629,9 +2460,9 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2645,9 +2476,9 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2661,9 +2492,9 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index da53ea556..d12783c25 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -163,7 +163,7 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -215,7 +215,7 @@ bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); // add rB to rA (if rA != 0) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -258,7 +258,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // emit load iml @@ -297,7 +297,7 @@ bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); // add rB to rA (if rA != 0) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); @@ -326,7 +326,7 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -375,7 +375,7 @@ bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); // calculate EA in rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); return true; @@ -412,7 +412,7 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -1114,7 +1114,7 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired load @@ -1165,7 +1165,7 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store From 8df0281baa931fe98692ab0cfb84b23cc0a950b4 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 19 Dec 2022 01:29:50 +0100 Subject: [PATCH 24/64] PPCRec: Further unify CR code --- .../Recompiler/BackendX64/BackendX64.cpp | 177 ++-------------- .../Espresso/Recompiler/IML/IMLInstruction.h | 13 +- .../Recompiler/PPCRecompilerImlGen.cpp | 197 ++++++------------ .../Recompiler/PPCRecompilerImlGenFPU.cpp | 2 +- 4 files changed, 91 insertions(+), 298 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 461531f98..3271ef4b2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -582,7 +582,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, // bswap EAX x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX); - //x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP); x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); @@ -761,6 +760,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) @@ -773,19 +773,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by AND/OR - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Use different version of PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction) - // todo: Also set summary overflow if xer bit is set - } } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) @@ -798,16 +789,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult = ~registerOperand1 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r.registerResult; @@ -822,23 +807,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0); // update carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // update cr if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { @@ -878,23 +850,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - // todo: Set CR SO if XER SO bit is set - } } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= immS32 + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); @@ -902,12 +865,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { // registerResult ^= immS32 + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult <<<= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1076,6 +1041,8 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) { // registerResult = registerOperand1 + registerOperand2 @@ -1127,17 +1094,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { @@ -1171,20 +1127,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // sub operand2 x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } } else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult = registerOperand1 - registerOperand2 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; @@ -1228,14 +1174,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } // update carry flag (todo: is this actually correct in all cases?) x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // update cr0 if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - assert_dbg(); - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { @@ -1259,17 +1197,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // add operand2 x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - // since IMUL instruction leaves relevant flags undefined, we have to use another TEST instruction to get the correct results - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) { @@ -1387,16 +1314,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { @@ -1433,16 +1350,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) { @@ -1519,6 +1426,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; @@ -1551,19 +1459,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); if( rRegResult != X86_REG_RDX ) x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); - // set cr bits if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_ARITHMETIC ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; @@ -1598,16 +1497,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); if( rRegResult != X86_REG_RDX ) x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); - // set cr bits if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_ORC ) { @@ -1622,17 +1511,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, if( rRegResult != rRegOperand1 ) x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); - - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } } else { @@ -1681,6 +1559,8 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + sint32 regResult = imlInstruction->op_r_r_s32.registerResult; sint32 regOperand = imlInstruction->op_r_r_s32.registerA; uint32 immS32 = imlInstruction->op_r_r_s32.immS32; @@ -1706,6 +1586,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; @@ -1719,16 +1600,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); // update carry flag x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) { @@ -1836,15 +1707,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // jump destination PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); - // CR update - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - } } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT ) @@ -1858,13 +1720,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); else x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); - // CR update - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - // since SHL/SHR only modifies the OF flag we need another TEST reg,reg here - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 54c31ee4d..35db10a1c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -4,7 +4,6 @@ enum { PPCREC_IML_OP_ASSIGN, // '=' operator PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) @@ -12,10 +11,7 @@ enum PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide) PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide) - PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit - PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag - PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag + // binary operation PPCREC_IML_OP_OR, // '|' operator PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first @@ -96,6 +92,12 @@ enum PPCREC_IML_OP_ASSIGN_S16_TO_S32, PPCREC_IML_OP_ASSIGN_S8_TO_S32, + // deprecated + PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit + PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit + PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit + PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag + PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag }; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) @@ -147,7 +149,6 @@ enum PPCREC_CR_MODE_COMPARE_SIGNED, PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare - PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code) PPCREC_CR_MODE_LOGICAL, }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 420bf194a..024b6b860 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -776,14 +776,9 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -795,10 +790,9 @@ bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -810,10 +804,9 @@ bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -833,14 +826,9 @@ bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); } - if( opcode&PPC_OPC_RC ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -860,14 +848,9 @@ bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); } - if( opcode&PPC_OPC_RC ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); - } + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -889,7 +872,7 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // rA not used, instruction is value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm); } // never updates any cr return true; @@ -925,7 +908,6 @@ bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); // rD = rA + imm; uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); // never updates any cr @@ -940,9 +922,9 @@ bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); // rD = rA + imm; uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -955,10 +937,9 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -970,10 +951,9 @@ bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -985,10 +965,9 @@ bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opc debugBreakpoint(); uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); return true; } @@ -1045,10 +1024,9 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { return false; } - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); return true; } @@ -1060,10 +1038,9 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); return true; } @@ -1075,10 +1052,9 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); return true; } @@ -1090,14 +1066,9 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); - } + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); return true; } @@ -1109,14 +1080,9 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); - } + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); return true; } @@ -1125,42 +1091,29 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc int rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 mask = ppc_mask(MB, ME); - //uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH); - //hCPU->gpr[rA] = v & mask; uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // handle special forms of RLWINM - if( SH == 0 && SH == (ME-SH) && MB == 0 ) - { - // CLRRWI - // todo - } - else if( ME == (31-SH) && MB == 0 ) + if( ME == (31-SH) && MB == 0 ) { // SLWI - if(opcode&PPC_OPC_RC) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); - return true; + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); } else if( SH == (32-MB) && ME == 31 ) { // SRWI - if(opcode&PPC_OPC_RC) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); - return true; + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); + } + else + { + // general handler + if (registerRA != registerRS) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRA, registerRS); + if (SH != 0) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH); + if (mask != 0xFFFFFFFF) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); } - // general handler - if( registerRA != registerRS ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRA, registerRS); - if( SH != 0 ) - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH); - if( mask != 0xFFFFFFFF ) - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; @@ -1185,14 +1138,10 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME); - // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]); uint32 mask = ppc_mask(MB, ME); - // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]); - // hCPU->gpr[rA] = v & mask; uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if( mask != 0xFFFFFFFF ) ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); @@ -1208,10 +1157,9 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( (opcode&PPC_OPC_RC) != 0 ) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1223,10 +1171,9 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco cemu_assert_debug(SH < 32); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH, 0, PPCREC_CR_MODE_LOGICAL); - else - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1238,14 +1185,9 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if (opcode & PPC_OPC_RC) - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1257,14 +1199,9 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if (opcode & PPC_OPC_RC) - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; } @@ -1890,10 +1827,10 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); } -// generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX) +// generic indexed store (STWX, STHX, STBX, STWUX. If byteReversed == true -> STHBRX) bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth, bool byteReversed = false) { sint32 rA, rS, rB; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index d12783c25..c7f91ac26 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -258,7 +258,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // emit load iml From 37256ac58935a7e07bff99d80dc0733002560d70 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 27 Dec 2022 05:20:47 +0100 Subject: [PATCH 25/64] PPCRec: Rework carry bit and generalize carry IML instructions Carry bit is now resident in a register-allocated GPR instead of being backed directly into IML instructions All the PowerPC carry ADD* and SUB* instructions as well as SRAW/SRAWI have been reworked to use more generalized IML instructions for handling carry IML instructions now support two named output registers instead of only one (easily extendable to arbitrary count) --- src/Cafe/HW/Espresso/PPCState.h | 3 +- .../Recompiler/BackendX64/BackendX64.cpp | 590 ++- .../Recompiler/BackendX64/BackendX64.h | 46 +- .../Recompiler/BackendX64/BackendX64BMI.cpp | 37 + .../Recompiler/BackendX64/BackendX64FPU.cpp | 4 +- .../Recompiler/BackendX64/x86Emitter.h | 3388 +++++++++++++++-- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 24 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 39 +- .../Recompiler/IML/IMLInstruction.cpp | 163 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 137 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 402 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 80 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 15 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 8 + .../Recompiler/PPCRecompilerImlGen.cpp | 368 +- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 2 +- 16 files changed, 4121 insertions(+), 1185 deletions(-) diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index c315ed0e2..ea7edfa29 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -67,7 +67,8 @@ struct PPCInterpreter_t uint32 reservedMemValue; // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; + uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this + uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode struct { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 3271ef4b2..ed812b46f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -23,6 +23,11 @@ static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) return (x86Assembler64::GPR32)regId; } +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + X86Cond _x86Cond(IMLCondition imlCond) { @@ -32,6 +37,10 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_Z; case IMLCondition::NEQ: return X86_CONDITION_NZ; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_NBE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_B; default: break; } @@ -758,56 +767,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else assert_dbg(); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = ~registerOperand1 + carry - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = imlInstruction->op_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r.registerA; - // copy operand to result register - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - // execute NOT on result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry - x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0); - // update carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1043,56 +1002,26 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) + if( imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - - bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it if( rRegResult == rRegOperand1 ) - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); else - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } else { // copy operand1 to destination register before doing addition x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // add operand2 - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry - if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) - { - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) @@ -1128,52 +1057,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) + else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = registerOperand1 - registerOperand2 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - if( rRegOperand1 == rRegOperand2 ) - { - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = operand1 - operand1 -> 0 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperand1 ) - { - // copy inverted xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = result - operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else if ( rRegResult == rRegOperand2 ) - { - // result = operand1 - result - // NOT result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ADC result, operand1 - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } + sint32 rRegA = imlInstruction->op_r_r_r.registerA; + sint32 rRegB = imlInstruction->op_r_r_r.registerB; + if (rRegResult == rRegB) + std::swap(rRegA, rRegB); + + if (rRegResult != rRegA) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA); + + if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); + else if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // sub operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry flag (todo: is this actually correct in all cases?) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { @@ -1198,79 +1100,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = registerOperand2(rB) - registerOperand1(rA) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // updates carry flag - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - return false; - } - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB; - // update carry flag - // carry flag is detected this way: - //if ((~a+b) < a) { - // return true; - //} - //if ((~a+b+1) < 1) { - // return true; - //} - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // do subtraction - if( rRegOperandB == rRegOperandA ) - { - // result = operandA - operandA -> 0 - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperandB ) - { - // result = result - operandA - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - else if ( rRegResult == rRegOperandA ) - { - // result = operandB - result - // NEG result - x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operandB - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandB); - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperandB); - // sub operand2 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - } else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) @@ -1351,78 +1180,88 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + // x86's shift and rotate instruction have the shift amount hardwired to the CL register + // since our register allocator doesn't support instruction based fixed phys registers yet + // we'll instead have to temporarily shuffle registers around + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - // save cr - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) + + // we use BMI2's shift instructions until the RA can assign fixed registers + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - return false; + x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - // todo: Use BMI instructions if available? - // MOV registerResult, registerOperand (if different) - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - // reset carry - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); - sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // unsigned shift (MSB of input register is not set) - for(sint32 b=0; b<6; b++) + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) { - x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - if( b == 5 ) - { - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex()); + x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0); - // signed shift - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex()); - for(sint32 b=0; b<6; b++) + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - // check if we need to shift by (1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - // set ca if any non-zero bit is shifted out - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex()); - // arithmetic shift - if( b == 5 ) - { - // copy sign bit into all bits - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex()); + x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - // update CR if requested - // todo + + //auto rResult = _reg32(rRegResult); + //auto rOp2 = _reg8_from_reg32(_reg32(rRegOperand2)); + + //if (rRegResult == rRegOperand2) + //{ + // if (rRegResult != rRegOperand1) + // __debugbreak(); // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2) + //} + + //if(rRegOperand1 != rRegResult) + // x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + + //cemu_assert_debug(rRegOperand1 != X86_REG_ECX); + + //if (rRegOperand2 == X86_REG_ECX) + //{ + // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + // x64GenContext->emitter->SAR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + // x64GenContext->emitter->SHR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + // x64GenContext->emitter->SHL_d_CL(rResult); + // else + // cemu_assert_unimplemented(); + //} + //else + //{ + // auto rRegResultOrg = rRegResult; + // if (rRegResult == X86_REG_ECX) + // { + // x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegResult); + // rRegResult = REG_RESV_TEMP; + // rResult = _reg32(rRegResult); + // } + // + // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + // + // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + // x64GenContext->emitter->SAR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + // x64GenContext->emitter->SHR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + // x64GenContext->emitter->SHL_d_CL(rResult); + // else + // cemu_assert_unimplemented(); + + // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + + // // move result back if it was in ECX + // if (rRegResultOrg == X86_REG_ECX) + // { + // x64Gen_mov_reg64_reg64(x64GenContext, rRegResultOrg, REG_RESV_TEMP); + // } + //} } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { @@ -1520,6 +1359,44 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); + auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); + auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry); + cemu_assert_debug(regCarry != regR && regCarry != regA); + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + x64GenContext->emitter->ADD_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); @@ -1557,6 +1434,14 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->JMP_j32(0); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1584,65 +1469,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) + else if (imlInstruction->operation == PPCREC_IML_OP_AND || + imlInstruction->operation == PPCREC_IML_OP_OR || + imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = registerOperand + immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; - uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); - // update carry flag - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = immS32 - registerOperand PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; - sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // do actual computation of value, note: a - b is equivalent to a + ~b + 1 - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1); + if (regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32); + else if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32); + else // XOR + x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) { @@ -1679,47 +1519,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) - { - // registerResult = registerOperand>>SH and set xer ca flag - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; - // MOV registerResult, registerOperand (if different) - if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); - // todo: Detect if we don't need to update carry - // generic case - // TEST registerResult, (1<<(SH+1))-1 - uint32 caTestMask = 0; - if (sh >= 31) - caTestMask = 0x7FFFFFFF; - else - caTestMask = (1 << (sh)) - 1; - x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask); - // SETNE/NZ [ESP+XER_CA] - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // SAR registerResult, SH - x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); - // JNS (if sign not set) - sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here - // MOV BYTE [ESP+xer_ca], 0 - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // jump destination - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); - } - else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || - imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT ) + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); - // Shift - if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) + + if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); - else + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); + else // RIGHT_SHIFT_S + x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); } else { @@ -1729,6 +1542,40 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction return true; } +bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); + sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; + auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry); + cemu_assert_debug(regCarry != regR && regCarry != regA); + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->ADD_di32(regR, immS32); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_di32(regR, immS32); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) @@ -1925,7 +1772,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } else assert_dbg(); @@ -1957,7 +1808,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); } else assert_dbg(); @@ -2016,37 +1871,37 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) { if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) { if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) { - if( PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) { if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; } else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) { @@ -2063,6 +1918,13 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } + else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) + { + if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) + { + codeGenerationFailed = true; + } + } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) { if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index eefd9da36..b9cb0585e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -33,43 +33,6 @@ struct x64GenContext_t std::vector relocateOffsetTable2; }; -// todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually -//#define X86_REG_EAX 0 -//#define X86_REG_ECX 1 -//#define X86_REG_EDX 2 -//#define X86_REG_EBX 3 -//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer -//#define X86_REG_EBP 5 -//#define X86_REG_ESI 6 -//#define X86_REG_EDI 7 -//#define X86_REG_NONE -1 -// -//#define X86_REG_RAX 0 -//#define X86_REG_RCX 1 -//#define X86_REG_RDX 2 -//#define X86_REG_RBX 3 -//#define X86_REG_RSP 4 // reserved for hCPU pointer -//#define X86_REG_RBP 5 -//#define X86_REG_RSI 6 -//#define X86_REG_RDI 7 -//#define X86_REG_R8 8 -//#define X86_REG_R9 9 -//#define X86_REG_R10 10 -//#define X86_REG_R11 11 -//#define X86_REG_R12 12 -//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -//#define X86_REG_R14 14 // reserved as temporary register -//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData -// -//#define X86_REG_AL 0 -//#define X86_REG_CL 1 -//#define X86_REG_DL 2 -//#define X86_REG_BL 3 -//#define X86_REG_AH 4 -> Adressable via non-REX only -//#define X86_REG_CH 5 -//#define X86_REG_DH 6 -//#define X86_REG_BH 7 - // reserved registers #define REG_RESV_TEMP (X86_REG_R14) #define REG_RESV_HCPU (X86_REG_RSP) @@ -79,8 +42,7 @@ struct x64GenContext_t // reserved floating-point registers #define REG_RESV_FPR_TEMP (15) - -#define reg32ToReg16(__x) (__x) +#define reg32ToReg16(__x) (__x) // deprecated // deprecated condition flags enum @@ -308,4 +270,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister); void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); -void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp index c9ffc4649..bbb707e05 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp @@ -68,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + // SARX reg64, reg64, reg64 + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) { // SHLX reg64, reg64, reg64 @@ -76,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8); x64Gen_writeU8(x64GenContext, 0xF7); x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index c7e11d42a..e50052d5a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -623,11 +623,11 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - // store double low part + // store double low part x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); - // store double high part + // store double high part x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h index 4c67797c7..6b05a5146 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -99,7 +99,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -125,7 +131,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -166,7 +178,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -192,7 +210,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -216,7 +240,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -242,7 +272,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -260,22 +296,28 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_bb(GPR8_REX dst, GPR8_REX src) + void OR_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x30); + _emitU8(0x08); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -286,7 +328,7 @@ class x86Assembler64 if ((src >= 4) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x30); + _emitU8(0x08); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -295,13 +337,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -312,7 +360,7 @@ class x86Assembler64 if ((dst >= 4) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x32); + _emitU8(0x0a); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -321,28 +369,34 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_dd(GPR32 dst, GPR32 src) + void OR_dd(GPR32 dst, GPR32 src) { if (((src & 8) != 0) || ((dst & 8) != 0)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x31); + _emitU8(0x09); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void XOR_qq(GPR64 dst, GPR64 src) + void OR_qq(GPR64 dst, GPR64 src) { _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - _emitU8(0x31); + _emitU8(0x09); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -353,7 +407,7 @@ class x86Assembler64 if ((src & 8) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x31); + _emitU8(0x09); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -362,13 +416,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -377,7 +437,7 @@ class x86Assembler64 { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x31); + _emitU8(0x09); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -386,13 +446,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -403,7 +469,7 @@ class x86Assembler64 if ((dst & 8) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x33); + _emitU8(0x0b); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -412,13 +478,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -427,7 +499,7 @@ class x86Assembler64 { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x33); + _emitU8(0x0b); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -436,22 +508,28 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_bb(GPR8_REX dst, GPR8_REX src) + void ADC_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x38); + _emitU8(0x10); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -462,7 +540,7 @@ class x86Assembler64 if ((src >= 4) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x38); + _emitU8(0x10); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -471,13 +549,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -488,7 +572,7 @@ class x86Assembler64 if ((dst >= 4) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x3a); + _emitU8(0x12); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -497,28 +581,34 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_dd(GPR32 dst, GPR32 src) + void ADC_dd(GPR32 dst, GPR32 src) { if (((src & 8) != 0) || ((dst & 8) != 0)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x39); + _emitU8(0x11); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_qq(GPR64 dst, GPR64 src) + void ADC_qq(GPR64 dst, GPR64 src) { _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - _emitU8(0x39); + _emitU8(0x11); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -529,7 +619,7 @@ class x86Assembler64 if ((src & 8) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x39); + _emitU8(0x11); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -538,13 +628,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -553,7 +649,7 @@ class x86Assembler64 { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x39); + _emitU8(0x11); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -562,13 +658,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -579,7 +681,7 @@ class x86Assembler64 if ((dst & 8) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x3b); + _emitU8(0x13); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -588,13 +690,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -603,7 +711,7 @@ class x86Assembler64 { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x3b); + _emitU8(0x13); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -612,171 +720,148 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void CMP_di32(GPR32 dst, s32 imm) + void SBB_bb(GPR8_REX dst, GPR8_REX src) { - if (((dst & 8) != 0)) + if ((src >= 4) || (dst >= 4)) { - _emitU8(0x40 | ((dst & 8) >> 3)); + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x81); - _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); - _emitU32((u32)imm); - } - void CMP_qi32(GPR64 dst, s32 imm) - { - _emitU8(0x48 | ((dst & 8) >> 3)); - _emitU8(0x81); - _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); - _emitU32((u32)imm); + _emitU8(0x18); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - if ((memReg & 8)) - _emitU8(0x40 | ((memReg & 8) >> 1)); + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x81); - _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0x18); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU32((u32)imm); } - void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x81); - _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0x1a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU32((u32)imm); } - void CMP_di8(GPR32 dst, s8 imm) + void SBB_dd(GPR32 dst, GPR32 src) { - if (((dst & 8) != 0)) + if (((src & 8) != 0) || ((dst & 8) != 0)) { - _emitU8(0x40 | ((dst & 8) >> 3)); + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x83); - _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); - _emitU8((u8)imm); + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_qi8(GPR64 dst, s8 imm) + void SBB_qq(GPR64 dst, GPR64 src) { - _emitU8(0x48 | ((dst & 8) >> 3)); - _emitU8(0x83); - _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); - _emitU8((u8)imm); + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); - if (sib_use) - { - if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); - } - else - { - if ((memReg & 8)) - _emitU8(0x40 | ((memReg & 8) >> 1)); - } - _emitU8(0x83); - _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); - if (sib_use) + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) { - _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; } - if (mod == 1) _emitU8((u8)offset); - else if (mod == 2) _emitU32((u32)offset); - _emitU8((u8)imm); - } - void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) - { - uint8 mod; - if (offset == 0 && (memReg & 7) != 5) mod = 0; - else if (offset == (s32)(s8)offset) mod = 1; - else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); if (sib_use) { - _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x83); - _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0x19); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU8((u8)imm); - } - void TEST_bb(GPR8_REX dst, GPR8_REX src) - { - if ((src >= 4) || (dst >= 4)) - { - _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - } - _emitU8(0x84); - _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); } else { - if ((src >= 4) || (memReg & 8)) - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x84); + _emitU8(0x19); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -785,40 +870,31 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void TEST_dd(GPR32 dst, GPR32 src) - { - if (((src & 8) != 0) || ((dst & 8) != 0)) - { - _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - } - _emitU8(0x85); - _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); - } - void TEST_qq(GPR64 dst, GPR64 src) - { - _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - _emitU8(0x85); - _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); - } - void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - if ((src & 8) || (memReg & 8)) - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x85); - _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); @@ -826,23 +902,29 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); } else { - _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x85); - _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); @@ -850,22 +932,28 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_bb(GPR8_REX dst, GPR8_REX src) + void AND_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x88); + _emitU8(0x20); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -876,7 +964,7 @@ class x86Assembler64 if ((src >= 4) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x88); + _emitU8(0x20); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -885,13 +973,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -902,7 +996,7 @@ class x86Assembler64 if ((dst >= 4) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x8a); + _emitU8(0x22); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -911,28 +1005,34 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_dd(GPR32 dst, GPR32 src) + void AND_dd(GPR32 dst, GPR32 src) { if (((src & 8) != 0) || ((dst & 8) != 0)) { _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); } - _emitU8(0x89); + _emitU8(0x21); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void MOV_qq(GPR64 dst, GPR64 src) + void AND_qq(GPR64 dst, GPR64 src) { _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); - _emitU8(0x89); + _emitU8(0x21); _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); } - void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -943,7 +1043,7 @@ class x86Assembler64 if ((src & 8) || (memReg & 8)) _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x89); + _emitU8(0x21); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -952,13 +1052,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -967,7 +1073,7 @@ class x86Assembler64 { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x89); + _emitU8(0x21); _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -976,13 +1082,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -993,7 +1105,7 @@ class x86Assembler64 if ((dst & 8) || (memReg & 8)) _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); } - _emitU8(0x8b); + _emitU8(0x23); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -1002,13 +1114,19 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -1017,7 +1135,7 @@ class x86Assembler64 { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x8b); + _emitU8(0x23); _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { @@ -1026,37 +1144,2637 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void MOV_di32(GPR32 dst, s32 imm) + void SUB_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x32); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADD_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void CMP_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void TEST_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xb8 | ((dst) & 7)); + _emitU32((u32)imm); + } + void MOV_qi64(GPR64 dst, s64 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xb8 | ((dst) & 7)); + _emitU64((u64)imm); + } + void CALL_q(GPR64 dst) { if (((dst & 8) != 0)) { _emitU8(0x40 | ((dst & 8) >> 3)); } - _emitU8(0xb8 | ((dst) & 7)); - _emitU32((u32)imm); + _emitU8(0xff); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + } + void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xff); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SHL_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); } - void MOV_qi64(GPR64 dst, s64 imm) + void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { - _emitU8(0x48 | ((dst & 8) >> 3)); - _emitU8(0xb8 | ((dst) & 7)); - _emitU64((u64)imm); + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); } - void CALL_q(GPR64 dst) + void SAR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHL_d_CL(GPR32 dst) { if (((dst & 8) != 0)) { _emitU8(0x40 | ((dst & 8) >> 3)); } - _emitU8(0xff); - _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); } - void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + void SHL_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1067,8 +3785,8 @@ class x86Assembler64 if ((memReg & 8)) _emitU8(0x40 | ((memReg & 8) >> 1)); } - _emitU8(0xff); - _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); @@ -1076,143 +3794,194 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } - void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm) + void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { - if (((dst & 8) != 0) || ((src & 8) != 0)) + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) { - _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; } - _emitU8(0x69); - _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); - _emitU32((u32)imm); + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); } - void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm) + void SHR_d_CL(GPR32 dst) { - _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); - _emitU8(0x69); - _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); - _emitU32((u32)imm); + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); } - void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + void SHR_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - if ((dst & 8) || (memReg & 8)) - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); } - _emitU8(0x69); - _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU32((u32)imm); } - void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); } else { - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x69); - _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU32((u32)imm); } - void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm) + void SAR_d_CL(GPR32 dst) { - if (((dst & 8) != 0) || ((src & 8) != 0)) + if (((dst & 8) != 0)) { - _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x40 | ((dst & 8) >> 3)); } - _emitU8(0x6b); - _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); - _emitU8((u8)imm); + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); } - void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm) + void SAR_q_CL(GPR64 dst) { - _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); - _emitU8(0x6b); - _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); - _emitU8((u8)imm); + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); } - void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); } else { - if ((dst & 8) || (memReg & 8)) - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); } - _emitU8(0x6b); - _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU8((u8)imm); } - void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) { uint8 mod; if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); } else { - _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); } - _emitU8(0x6b); - _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); if (sib_use) { _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); } if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); - _emitU8((u8)imm); + } + void JMP_j32(s32 imm) + { + _emitU8(0xe9); + _emitU32((u32)imm); } void Jcc_j32(X86Cond cond, s32 imm) { @@ -1236,7 +4005,13 @@ class x86Assembler64 if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1257,4 +4032,89 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void BT_du8(GPR32 dst, u8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_qu8(GPR64 dst, u8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } }; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index fae49541c..cd40de7f4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -5,10 +5,12 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" /* - * Initializes a single segment and returns true if it is a finite loop + * Analyzes a single segment and returns true if it is a finite loop */ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) { + return false; // !!! DISABLED !!! + bool isTightFiniteLoop = false; // base criteria, must jump to beginning of same segment if (imlSegment->nextSegmentBranchTaken != imlSegment) @@ -42,9 +44,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) continue; instIt.CheckRegisterUsage(®istersUsed); - if(registersUsed.writtenNamedReg1 < 0) - continue; - list_modifiedRegisters.remove(registersUsed.writtenNamedReg1); + registersUsed.ForEachWrittenGPR([&](IMLReg r) { list_modifiedRegisters.remove(r); }); } if (list_modifiedRegisters.count > 0) { @@ -63,10 +63,6 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) return true; - if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) - return true; // ?? - if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) - return true; // ?? if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) @@ -79,6 +75,18 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) return true; + + // new instructions + if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + return true; + + return false; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 72f706d96..4dafaf18b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -14,10 +14,10 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return "MOV"; else if (op == PPCREC_IML_OP_ADD) return "ADD"; + else if (op == PPCREC_IML_OP_ADD_WITH_CARRY) + return "ADC"; else if (op == PPCREC_IML_OP_SUB) return "SUB"; - else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY) - return "ADDCSC"; else if (op == PPCREC_IML_OP_OR) return "OR"; else if (op == PPCREC_IML_OP_AND) @@ -26,8 +26,12 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return "XOR"; else if (op == PPCREC_IML_OP_LEFT_SHIFT) return "LSH"; - else if (op == PPCREC_IML_OP_RIGHT_SHIFT) + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U) return "RSH"; + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S) + return "ARSH"; + else if (op == PPCREC_IML_OP_LEFT_ROTATE) + return "LROT"; else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) return "MULS"; else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) @@ -129,6 +133,14 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "EQ"; case IMLCondition::NEQ: return "NEQ"; + case IMLCondition::UNSIGNED_GT: + return "UGT"; + case IMLCondition::UNSIGNED_LT: + return "ULT"; + case IMLCondition::SIGNED_GT: + return "SGT"; + case IMLCondition::SIGNED_LT: + return "SLT"; default: cemu_assert_unimplemented(); } @@ -224,6 +236,16 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + } else if (inst.type == PPCREC_IML_TYPE_COMPARE) { strOutput.add("CMP "); @@ -270,6 +292,17 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); + } else if (inst.type == PPCREC_IML_TYPE_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index d4cfdcb12..52e19e8c5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -10,6 +10,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg2 = -1; registersUsed->readNamedReg3 = -1; registersUsed->writtenNamedReg1 = -1; + registersUsed->writtenNamedReg2 = -1; registersUsed->readFPR1 = -1; registersUsed->readFPR2 = -1; registersUsed->readFPR3 = -1; @@ -34,10 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if ( operation == PPCREC_IML_OP_OR || operation == PPCREC_IML_OP_AND || - operation == PPCREC_IML_OP_XOR || - operation == PPCREC_IML_OP_ADD_CARRY || // r_r carry stuff is deprecated - operation == PPCREC_IML_OP_ADD_CARRY_ME || - operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) + operation == PPCREC_IML_OP_XOR) { // result is read and written, operand is read registersUsed->writtenNamedReg1 = op_r_r.registerResult; @@ -112,6 +110,24 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg1 = op_r_r_s32.registerA; } } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + registersUsed->writtenNamedReg1 = op_r_r_s32_carry.regR; + registersUsed->readNamedReg1 = op_r_r_s32_carry.regA; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readNamedReg2 = op_r_r_s32_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenNamedReg2 = op_r_r_s32_carry.regCarry; + } else if (type == PPCREC_IML_TYPE_R_R_R) { // in all cases result is written and other operands are read only @@ -119,6 +135,25 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg1 = op_r_r_r.registerA; registersUsed->readNamedReg2 = op_r_r_r.registerB; } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + registersUsed->writtenNamedReg1 = op_r_r_r_carry.regR; + registersUsed->readNamedReg1 = op_r_r_r_carry.regA; + registersUsed->readNamedReg2 = op_r_r_r_carry.regB; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readNamedReg3 = op_r_r_r_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers @@ -155,6 +190,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { registersUsed->readNamedReg1 = op_conditionalJump2.registerBool; } + else if (type == PPCREC_IML_TYPE_JUMP) + { + // no registers affected + } else if (type == PPCREC_IML_TYPE_LOAD) { registersUsed->writtenNamedReg1 = op_storeLoad.registerData; @@ -215,6 +254,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same registersUsed->readFPR4 = op_storeLoad.registerData; + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: @@ -227,6 +267,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; default: cemu_assert_unimplemented(); @@ -251,6 +292,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); registersUsed->readFPR4 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: @@ -263,6 +305,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; default: cemu_assert_unimplemented(); @@ -283,6 +326,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg2 = op_storeLoad.registerGQR; break; default: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; } } @@ -304,6 +348,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg3 = op_storeLoad.registerGQR; break; default: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; } } @@ -430,8 +475,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const #define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) +sint32 replaceRegisterMultiple(sint32 reg, const std::unordered_map& translationTable) +{ + const auto& it = translationTable.find(reg); + cemu_assert_debug(it != translationTable.cend()); + return it->second; +} + sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) { + // deprecated but still used for FPRs for (sint32 i = 0; i < 4; i++) { if (match[i] < 0) @@ -444,56 +497,70 @@ sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) return reg; } -void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +//void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable) { if (type == PPCREC_IML_TYPE_R_NAME) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_NAME_R) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_R_R) { - op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); + op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, translationTable); + op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, translationTable); } else if (type == PPCREC_IML_TYPE_R_S32) { - op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32) { - op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); + op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, translationTable); + op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + op_r_r_s32_carry.regR = replaceRegisterMultiple(op_r_r_s32_carry.regR, translationTable); + op_r_r_s32_carry.regA = replaceRegisterMultiple(op_r_r_s32_carry.regA, translationTable); + op_r_r_s32_carry.regCarry = replaceRegisterMultiple(op_r_r_s32_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_R) { - op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); + op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, translationTable); + op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, translationTable); + op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + op_r_r_r_carry.regR = replaceRegisterMultiple(op_r_r_r_carry.regR, translationTable); + op_r_r_r_carry.regA = replaceRegisterMultiple(op_r_r_r_carry.regA, translationTable); + op_r_r_r_carry.regB = replaceRegisterMultiple(op_r_r_r_carry.regB, translationTable); + op_r_r_r_carry.regCarry = replaceRegisterMultiple(op_r_r_r_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE) { - op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced); - op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, translationTable); + op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, translationTable); + op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE_S32) { - op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, translationTable); + op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced); + op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { // no effect on registers } @@ -509,7 +576,7 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (operation == PPCREC_IML_MACRO_B_TO_REG) { - op_macro.param = replaceRegisterMultiple(op_macro.param, gprRegisterSearched, gprRegisterReplaced); + op_macro.param = replaceRegisterMultiple(op_macro.param, translationTable); } else { @@ -518,33 +585,33 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (type == PPCREC_IML_TYPE_LOAD) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } } else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_STORE) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } else if (type == PPCREC_IML_TYPE_STORE_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_CR) { @@ -562,52 +629,52 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_R_R) @@ -654,7 +721,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) { // not affected } @@ -760,15 +827,15 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_R_R_S32) + else if (type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_R_R_S32_CARRY) { // not affected } - else if (type == PPCREC_IML_TYPE_R_R_R) + else if (type == PPCREC_IML_TYPE_R_R_R || type == PPCREC_IML_TYPE_R_R_R_CARRY) { // not affected } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) { // not affected } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 35db10a1c..9491136e3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -19,14 +19,13 @@ enum PPCREC_IML_OP_XOR, // '^' operator PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator PPCREC_IML_OP_LEFT_SHIFT, // shift left operator - PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) + PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned) + PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed) // ppc PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) - PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, - PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry) PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 PPCREC_IML_OP_MFCR, // copy cr to gpr PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) @@ -83,7 +82,7 @@ enum // R_R_S32 only // R_R_R + R_R_S32 - PPCREC_IML_OP_ADD, + PPCREC_IML_OP_ADD, // also R_R_R_CARRY PPCREC_IML_OP_SUB, // R_R only @@ -92,14 +91,10 @@ enum PPCREC_IML_OP_ASSIGN_S16_TO_S32, PPCREC_IML_OP_ASSIGN_S8_TO_S32, - // deprecated - PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit - PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag - PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag - + // R_R_R_carry + PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1) }; + #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) enum @@ -116,7 +111,7 @@ enum PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak }; -enum +enum // deprecated condition codes { PPCREC_JUMP_CONDITION_NONE, PPCREC_JUMP_CONDITION_E, // equal / zero @@ -158,7 +153,9 @@ enum PPCREC_IML_TYPE_NO_OP, // no-op instruction PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r) PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* + PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry) PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* + PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry) PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* @@ -174,6 +171,7 @@ enum // new style of handling conditions and branches: PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm + PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register // conditional @@ -199,6 +197,7 @@ enum PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 + PPCREC_NAME_XER_CA = 6000, // carry bit }; // special cases for LOAD/STORE @@ -260,8 +259,8 @@ struct IMLUsedRegisters sint16 readNamedReg2; sint16 readNamedReg3; sint16 writtenNamedReg1; + sint16 writtenNamedReg2; }; - sint16 gpr[4]; // 3 read + 1 write }; // FPR union @@ -275,10 +274,69 @@ struct IMLUsedRegisters sint16 readFPR4; sint16 writtenFPR1; }; - sint16 fpr[4]; + //sint16 fpr[4]; }; + + bool IsRegWritten(sint16 imlReg) const // GPRs + { + cemu_assert_debug(imlReg >= 0); + return writtenNamedReg1 == imlReg || writtenNamedReg2 == imlReg; + } + + template + void ForEachWrittenGPR(Fn F) + { + if (writtenNamedReg1 >= 0) + F(writtenNamedReg1); + if (writtenNamedReg2 >= 0) + F(writtenNamedReg2); + } + + template + void ForEachReadGPR(Fn F) + { + if (readNamedReg1 >= 0) + F(readNamedReg1); + if (readNamedReg2 >= 0) + F(readNamedReg2); + if (readNamedReg3 >= 0) + F(readNamedReg3); + } + + template + void ForEachAccessedGPR(Fn F) + { + if (readNamedReg1 >= 0) + F(readNamedReg1, false); + if (readNamedReg2 >= 0) + F(readNamedReg2, false); + if (readNamedReg3 >= 0) + F(readNamedReg3, false); + if (writtenNamedReg1 >= 0) + F(writtenNamedReg1, true); + if (writtenNamedReg2 >= 0) + F(writtenNamedReg2, true); + } + + bool HasFPRReg(sint16 imlReg) const + { + cemu_assert_debug(imlReg >= 0); + if (readFPR1 == imlReg) + return true; + if (readFPR2 == imlReg) + return true; + if (readFPR3 == imlReg) + return true; + if (readFPR4 == imlReg) + return true; + if (writtenFPR1 == imlReg) + return true; + return false; + } }; +using IMLReg = uint8; + struct IMLInstruction { uint8 type; @@ -307,12 +365,25 @@ struct IMLInstruction }op_r_r_r; struct { - // R = A (op) immS32 [update cr* in mode *] + IMLReg regR; + IMLReg regA; + IMLReg regB; + IMLReg regCarry; + }op_r_r_r_carry; + struct + { uint8 registerResult; uint8 registerA; sint32 immS32; }op_r_r_s32; struct + { + IMLReg regR; + IMLReg regA; + sint32 immS32; + IMLReg regCarry; + }op_r_r_s32_carry; + struct { // R/F = NAME or NAME = R/F uint8 registerIndex; @@ -426,6 +497,7 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || + type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) return true; return false; @@ -496,6 +568,18 @@ struct IMLInstruction this->op_r_r_r.registerB = registerB; } + void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry) + { + this->type = PPCREC_IML_TYPE_R_R_R_CARRY; + this->operation = operation; + this->crRegister = 0xFF; + this->crMode = 0xFF; + this->op_r_r_r_carry.regR = registerResult; + this->op_r_r_r_carry.regA = registerA; + this->op_r_r_r_carry.regB = registerB; + this->op_r_r_r_carry.regCarry = registerCarry; + } + void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) { // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") @@ -508,6 +592,18 @@ struct IMLInstruction this->op_r_r_s32.immS32 = immS32; } + void make_r_r_s32_carry(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 registerCarry) + { + this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; + this->operation = operation; + this->crRegister = 0xFF; + this->crMode = 0xFF; + this->op_r_r_s32_carry.regR = registerResult; + this->op_r_r_s32_carry.regA = registerA; + this->op_r_r_s32_carry.immS32 = immS32; + this->op_r_r_s32_carry.regCarry = registerCarry; + } + void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE; @@ -542,6 +638,14 @@ struct IMLInstruction this->op_conditionalJump2.mustBeTrue = mustBeTrue; } + void make_jump_new() + { + this->type = PPCREC_IML_TYPE_JUMP; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + } + // load from memory void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { @@ -572,7 +676,8 @@ struct IMLInstruction void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; - void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + void RewriteGPR(const std::unordered_map& translationTable); void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 2cbcb0c1d..f67b49e15 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -179,15 +179,7 @@ ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegist if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = false; - for (sint32 f = 0; f < 4; f++) - { - if (virtualReg == (sint32)instructionUsedRegisters->fpr[f]) - { - isReserved = true; - break; - } - } + bool isReserved = instructionUsedRegisters->HasFPRReg(virtualReg); if (isReserved) continue; if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) @@ -373,7 +365,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) return false; - if( registersUsed.writtenNamedReg1 == registerIndex ) + if (registersUsed.IsRegWritten(registerIndex)) return true; } // todo: Scan next segment(s) @@ -411,7 +403,7 @@ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcIml IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.writtenNamedReg1 == registerIndex ) + if( registersUsed.IsRegWritten(registerIndex) ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) return true; @@ -620,84 +612,84 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext } } -bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) -{ - IMLUsedRegisters registersUsed; - for (sint32 i = startIndex; i <= endIndex; i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenNamedReg1 == vreg) - return true; - } - return false; -} +//bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) +//{ +// IMLUsedRegisters registersUsed; +// for (sint32 i = startIndex; i <= endIndex; i++) +// { +// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; +// imlInstruction->CheckRegisterUsage(®istersUsed); +// if (registersUsed.IsRegWritten(vreg)) +// return true; +// } +// return false; +//} -sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) -{ - // current segment - sint32 currentIndex = startIndex; - IMLSegment* currentSegment = startSegment; - sint32 segmentIterateCount = 0; - sint32 foundRegister = -1; - while (true) - { - // stop scanning if segment is enterable - if (currentSegment->isEnterable) - return -1; - while (currentIndex >= 0) - { - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; - break; - } - // previous instruction - currentIndex--; - } - if (foundRegister >= 0) - break; - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlList.size() - 1; - segmentIterateCount++; - } - // scan again to make sure the register is not modified inbetween - currentIndex = startIndex; - currentSegment = startSegment; - segmentIterateCount = 0; - IMLUsedRegisters registersUsed; - while (true) - { - while (currentIndex >= 0) - { - // check if register is modified - currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenNamedReg1 == foundRegister) - return -1; - // check if end of scan reached - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - return foundRegister; - } - // previous instruction - currentIndex--; - } - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlList.size() - 1; - segmentIterateCount++; - } - return -1; -} +//sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) +//{ +// // current segment +// sint32 currentIndex = startIndex; +// IMLSegment* currentSegment = startSegment; +// sint32 segmentIterateCount = 0; +// sint32 foundRegister = -1; +// while (true) +// { +// // stop scanning if segment is enterable +// if (currentSegment->isEnterable) +// return -1; +// while (currentIndex >= 0) +// { +// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) +// { +// foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; +// break; +// } +// // previous instruction +// currentIndex--; +// } +// if (foundRegister >= 0) +// break; +// // continue at previous segment (if there is only one) +// if (segmentIterateCount >= 1) +// return -1; +// if (currentSegment->list_prevSegments.size() != 1) +// return -1; +// currentSegment = currentSegment->list_prevSegments[0]; +// currentIndex = currentSegment->imlList.size() - 1; +// segmentIterateCount++; +// } +// // scan again to make sure the register is not modified inbetween +// currentIndex = startIndex; +// currentSegment = startSegment; +// segmentIterateCount = 0; +// IMLUsedRegisters registersUsed; +// while (true) +// { +// while (currentIndex >= 0) +// { +// // check if register is modified +// currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); +// if (registersUsed.IsRegWritten(foundRegister)) +// return -1; +// // check if end of scan reached +// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) +// { +// return foundRegister; +// } +// // previous instruction +// currentIndex--; +// } +// // continue at previous segment (if there is only one) +// if (segmentIterateCount >= 1) +// return -1; +// if (currentSegment->list_prevSegments.size() != 1) +// return -1; +// currentSegment = currentSegment->list_prevSegments[0]; +// currentIndex = currentSegment->imlList.size() - 1; +// segmentIterateCount++; +// } +// return -1; +//} void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) { @@ -830,7 +822,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp { break; } - if (registersUsed.writtenNamedReg1 == gprIndex) + if (registersUsed.IsRegWritten(gprIndex)) return; // GPR overwritten, we don't need to byte swap anymore } if (foundMatch) @@ -933,6 +925,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; + if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) { @@ -946,6 +940,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; + if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } } else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) @@ -978,6 +974,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; + if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) { @@ -991,127 +989,129 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; + if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } } } } } -/* - * Returns true if registerWrite overwrites any of the registers read by registerRead - */ -bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) -{ - if (registerWrite->writtenNamedReg1 < 0) - return false; - - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) - return true; - return false; -} +///* +// * Returns true if registerWrite overwrites any of the registers read by registerRead +// */ +//bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) +//{ +// if (registerWrite->writtenNamedReg1 < 0) +// return false; +// +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) +// return true; +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) +// return true; +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) +// return true; +// return false; +//} void _reorderConditionModifyInstructions(IMLSegment* imlSegment) { - IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); - // last instruction is a conditional branch? - if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) - return; - if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) - return; - // get CR bitmask of bit required for conditional jump - PPCRecCRTracking_t crTracking; - IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); - uint32 requiredCRBits = crTracking.readCRBits; - - // scan backwards until we find the instruction that sets the CR - sint32 crSetterInstructionIndex = -1; - sint32 unsafeInstructionIndex = -1; - for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); - if (crTracking.readCRBits != 0) - return; // dont handle complex cases for now - if (crTracking.writtenCRBits != 0) - { - if ((crTracking.writtenCRBits&requiredCRBits) != 0) - { - crSetterInstructionIndex = i; - break; - } - else - { - return; // other CR bits overwritten (dont handle complex cases) - } - } - // is safe? (no risk of overwriting x64 eflags) - if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || - (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || - (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || - (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) - continue; - // not safe - if (unsafeInstructionIndex == -1) - unsafeInstructionIndex = i; - } - if (crSetterInstructionIndex < 0) - return; - if (unsafeInstructionIndex < 0) - return; // no danger of overwriting eflags, don't reorder - // check if we can move the CR setter instruction to after unsafeInstructionIndex - PPCRecCRTracking_t crTrackingSetter = crTracking; - IMLUsedRegisters regTrackingCRSetter; - imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); - if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) - return; // we don't handle FPR dependency yet so just ignore FPR instructions - IMLUsedRegisters registerTracking; - if (regTrackingCRSetter.writtenNamedReg1 >= 0) - { - // CR setter does write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); - // reads register written by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) - { - return; // cant move CR setter because of dependency - } - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - // overwrites register written by CR setter? - if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) - return; - } - } - else - { - // CR setter does not write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - } - } - - // move CR setter instruction -#ifdef CEMU_DEBUG_ASSERT - if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) - assert_dbg(); -#endif - IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); - memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); - imlSegment->imlList[crSetterInstructionIndex].make_no_op(); +// IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); +// // last instruction is a conditional branch? +// if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) +// return; +// if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) +// return; +// // get CR bitmask of bit required for conditional jump +// PPCRecCRTracking_t crTracking; +// IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); +// uint32 requiredCRBits = crTracking.readCRBits; +// +// // scan backwards until we find the instruction that sets the CR +// sint32 crSetterInstructionIndex = -1; +// sint32 unsafeInstructionIndex = -1; +// for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) +// { +// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; +// IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); +// if (crTracking.readCRBits != 0) +// return; // dont handle complex cases for now +// if (crTracking.writtenCRBits != 0) +// { +// if ((crTracking.writtenCRBits&requiredCRBits) != 0) +// { +// crSetterInstructionIndex = i; +// break; +// } +// else +// { +// return; // other CR bits overwritten (dont handle complex cases) +// } +// } +// // is safe? (no risk of overwriting x64 eflags) +// if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || +// (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || +// (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || +// (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) +// continue; +// // not safe +// if (unsafeInstructionIndex == -1) +// unsafeInstructionIndex = i; +// } +// if (crSetterInstructionIndex < 0) +// return; +// if (unsafeInstructionIndex < 0) +// return; // no danger of overwriting eflags, don't reorder +// // check if we can move the CR setter instruction to after unsafeInstructionIndex +// PPCRecCRTracking_t crTrackingSetter = crTracking; +// IMLUsedRegisters regTrackingCRSetter; +// imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); +// if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) +// return; // we don't handle FPR dependency yet so just ignore FPR instructions +// IMLUsedRegisters registerTracking; +// if (regTrackingCRSetter.writtenNamedReg1 >= 0) +// { +// // CR setter does write GPR +// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) +// { +// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); +// // reads register written by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) +// { +// return; // cant move CR setter because of dependency +// } +// // writes register read by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) +// { +// return; // cant move CR setter because of dependency +// } +// // overwrites register written by CR setter? +// if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) +// return; +// } +// } +// else +// { +// // CR setter does not write GPR +// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) +// { +// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); +// // writes register read by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) +// { +// return; // cant move CR setter because of dependency +// } +// } +// } +// +// // move CR setter instruction +//#ifdef CEMU_DEBUG_ASSERT +// if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) +// assert_dbg(); +//#endif +// IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); +// memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); +// imlSegment->imlList[crSetterInstructionIndex].make_no_op(); } /* diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 8c7c807d4..98ca687b5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -764,12 +764,11 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; - + std::unordered_map virt2PhysRegMap; // key = virtual register, value = physical register raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; - //sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) @@ -789,6 +788,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, assert_dbg(); #endif virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -806,6 +806,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (liverange->hasStore) @@ -844,37 +845,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // rewrite registers - // todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table if (index < imlSegment->imlList.size()) - { - IMLUsedRegisters gprTracking; - imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - - sint32 inputGpr[4]; - inputGpr[0] = gprTracking.gpr[0]; - inputGpr[1] = gprTracking.gpr[1]; - inputGpr[2] = gprTracking.gpr[2]; - inputGpr[3] = gprTracking.gpr[3]; - sint32 replaceGpr[4]; - for (sint32 f = 0; f < 4; f++) - { - sint32 virtualRegister = gprTracking.gpr[f]; - if (virtualRegister < 0) - { - replaceGpr[f] = -1; - continue; - } - if (virtualRegister >= IML_RA_VIRT_REG_COUNT_MAX) - assert_dbg(); - replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; - cemu_assert_debug(replaceGpr[f] >= 0); - } - imlSegment->imlList[index].ReplaceGPR(inputGpr, replaceGpr); - } + imlSegment->imlList[index].RewriteGPR(virt2PhysRegMap); // next iml instruction index++; } @@ -889,6 +866,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); // store GPR if (liverange->hasStore) { @@ -929,6 +907,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -1039,21 +1018,12 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { - // end loop at suffix instruction - //if (imlSegment->imlList[index].IsSuffixInstruction()) - // break; - // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - cemu_assert_debug(virtualRegister < IML_RA_VIRT_REG_COUNT_MAX); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction - } - // next instruction + gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { + cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX); + imlSegment->raDistances.reg[gprId].usageStart = std::min(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction + imlSegment->raDistances.reg[gprId].usageEnd = std::max(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction + }); index++; } } @@ -1141,29 +1111,17 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { - // we parse suffix instructions too for any potential input registers (writes not allowed), but note that any spills/stores need to happen before the suffix instruction - //// end loop at suffix instruction - //if (imlSegment->imlList[index].IsSuffixInstruction()) - // break; - // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - // handle accessed GPR - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - bool isWrite = (t == 3); + gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); + PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT - if ((sint32)index < vGPR2Subrange[virtualRegister]->start.index) - assert_dbg(); - if ((sint32)index + 1 > vGPR2Subrange[virtualRegister]->end.index) - assert_dbg(); + if ((sint32)index < vGPR2Subrange[gprId]->start.index) + assert_dbg(); + if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index) + assert_dbg(); #endif - } - // next instruction + }); index++; } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index ed8bee876..63fb5f725 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -167,13 +167,11 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr(); uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr(); if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0) { - if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr) { delete ppcRecFunc; @@ -188,11 +186,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - //if (ppcRecFunc->ppcAddress == 0x12345678) + //if (ppcRecFunc->ppcAddress == 0x11223344) + //{ + // //debug_printf("----------------------------------------\n"); + // //IMLDebug_Dump(&ppcImlGenContext); + // //__debugbreak(); + //} + //else //{ - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); + // delete ppcRecFunc; + // return nullptr; //} // Large functions for testing (botw): diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index bd2c02d8f..5a4484dac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -109,6 +109,14 @@ struct ppcImlGenContext_t segmentList2.insert(segmentList2.begin() + index, 1, newSeg); return newSeg; } + + std::span InsertSegments(size_t index, size_t count) + { + segmentList2.insert(segmentList2.begin() + index, count, {}); + for (size_t i = index; i < (index + count); i++) + segmentList2[i] = new IMLSegment(); + return { segmentList2.data() + index, count}; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 024b6b860..f7492e590 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -179,6 +179,39 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } + +// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards +template +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 3); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segBranchTaken = segments[1]; + IMLSegment* segMerge = segments[2]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segBranchTaken); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchTaken->SetLinkBranchNotTaken(segMerge); + segBranchNotTaken->SetLinkBranchTaken(segMerge); + // generate code for branch taken segment + ppcImlGenContext.currentOutputSegment = segBranchTaken; + genSegmentBranchTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchTaken); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + ppcImlGenContext.emitInst().make_jump_new(); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; +} + uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { if( mappedName == PPCREC_NAME_NONE ) @@ -782,96 +815,24 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return true; } -bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; -> Update carry - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca + -1; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); //hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; - if( rA != 0 ) + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, imm); } else { // rA not used, instruction is value assignment // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm); } // never updates any cr @@ -883,48 +844,93 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm); - if( rA != 0 ) + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); } else { // rA not used, instruction turns into simple value assignment // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; } -bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 rD, rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); - // never updates any cr + // r = a + b -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); return true; } -bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool updateCR0) { - // this opcode is identical to ADDIC but additionally it updates CR0 sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa); + if(updateCR0) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + return true; +} + +bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + b + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + 0xFFFFFFFF + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); return true; } @@ -932,71 +938,80 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + // rD = ~rA + rB + 1 + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - if( rB != 0 ) - debugBreakpoint(); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + 1; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB); - if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1 + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + imm + 1 sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 a = hCPU->gpr[rA]; - //hCPU->gpr[rD] = ~a + imm + 1; - // cr0 is never affected - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa); + // never affects CR0 return true; } @@ -1102,7 +1117,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc else if( SH == (32-MB) && ME == 31 ) { // SRWI - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, registerRA, registerRS, MB); } else { @@ -1152,14 +1167,45 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // unlike SRAWI, for SRAW the shift range is 0-63 (6 bits) + // but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + + uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 registerTmpCondBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + uint32 registerTmp1 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + uint32 registerTmp2 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + + // load masked shift factor into temporary register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F); + ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump_new(registerTmpCondBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken */ + genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount); + genCtx.emitInst().make_compare_s32(registerRA, 0, registerCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, shift size below 32 */ + genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp1, registerRS, 31); // signMask = input >> 31 (arithmetic shift) + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerTmp2, 1); // shiftMask = ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); + if (SH == 0) + return false; // becomes a no-op but also sets ca bit to 0? + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS, false); + uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + uint32 registerTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // calculate CA first + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmp, registerTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0) + // do the actual shift + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH); + if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; @@ -1999,7 +2056,7 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister); sint32 shiftAmount = (3 - b) * 8; if (shiftAmount) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, tmpReg, tmpReg, shiftAmount); ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false); nb--; if (nb == 0) @@ -2791,7 +2848,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; break; case 10: // CMPLI PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); @@ -2800,11 +2858,11 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); break; case 12: // ADDIC - if (PPCRecompilerImlGen_ADDIC(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; break; case 13: // ADDIC. - if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; break; case 14: // ADDI @@ -4010,36 +4068,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) } - - // insert cycle counter instruction in every segment that has a cycle count greater zero - //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - //{ - // if( segIt->ppcAddrMin == 0 ) - // continue; - // // count number of PPC instructions in segment - // // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions - // uint32 lastPPCInstAddr = 0; - // uint32 ppcCount2 = 0; - // for (sint32 i = 0; i < segIt->imlList.size(); i++) - // { - // if (segIt->imlList[i].associatedPPCAddress == 0) - // continue; - // if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) - // continue; - // lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; - // ppcCount2++; - // } - // //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - // uint32 cycleCount = ppcCount2;// ppcCount / 4; - // if( cycleCount > 0 ) - // { - // PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); - // segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; - // segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - // segIt->imlList[0].op_macro.param = cycleCount; - // } - //} return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index c7f91ac26..95cfd176d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -49,7 +49,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = 0) +void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); From ff09940d0357e5d6aeff07c82b3a4e5e8315c1f3 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 28 Dec 2022 14:26:38 +0100 Subject: [PATCH 26/64] PPCRec: Avoid complex optimizations in backend It's better to do it in a lowering pass so that the backend code can be kept as simple as possible --- .../Recompiler/BackendX64/BackendX64.cpp | 218 +----------------- .../Recompiler/BackendX64/BackendX64.h | 5 - .../Recompiler/BackendX64/BackendX64FPU.cpp | 7 - .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 4 - .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 1 - .../Recompiler/PPCRecompilerImlGen.cpp | 145 ++---------- 6 files changed, 26 insertions(+), 354 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index ed812b46f..aa0663109 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -57,24 +57,6 @@ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContex x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo); } -/* -* Overwrites the currently cached (in x64 cf) cr* register -* Should be called before each x64 instruction which overwrites the current status flags (with mappedCRRegister set to PPCREC_CR_TEMPORARY unless explicitly set by PPC instruction) -*/ -void PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 mappedCRRegister, sint32 crState) -{ - x64GenContext->activeCRRegister = mappedCRRegister; - x64GenContext->activeCRState = crState; -} - -/* -* Reset cached cr* register without storing it first -*/ -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext) -{ - x64GenContext->activeCRRegister = PPC_REC_INVALID_REGISTER; -} - void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) { uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset; @@ -116,7 +98,6 @@ void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcIm if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Set CR SO if XER SO bit is set - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, crRegister, PPCREC_CR_STATE_TYPE_LOGICAL); } void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) @@ -153,7 +134,6 @@ void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { uint32 branchDstReg = imlInstruction->op_macro.param; @@ -419,7 +399,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 16 ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available if (indexed) { x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -445,8 +424,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 8 ) { - if( indexed ) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -459,7 +436,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_storeLoad.immS32 != 0 ) assert_dbg(); // not supported if( indexed ) @@ -507,8 +483,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; if (imlInstruction->op_storeLoad.copyWidth == 32) { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 valueRegister; if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData) { @@ -532,8 +506,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == 16) { - if (indexed || swapEndian) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); if (swapEndian) x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); @@ -546,8 +518,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == 8) { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (indexed && realRegisterMem == realRegisterData) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); @@ -561,10 +531,9 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->op_storeLoad.immS32 != 0) assert_dbg(); // todo - // reset cr0 LT, GT and EQ + // reset cr0 LT, GT and EQ sint32 crRegister = 0; x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); @@ -650,19 +619,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA @@ -683,7 +649,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); @@ -692,7 +657,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); @@ -701,7 +665,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) @@ -726,7 +689,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { // registerA CMP registerB (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) { return false; // a NO-OP instruction @@ -735,11 +697,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { return false; } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // set cr bits @@ -770,7 +727,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); @@ -810,30 +766,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { // registerResult &= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { // registerResult ^= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult <<<= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen @@ -842,7 +792,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { // registerResult CMP immS32 (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); @@ -853,11 +802,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); return false; } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); // set cr bits @@ -886,7 +830,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); for(sint32 f=0; f<32; f++) @@ -897,7 +840,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); for (sint32 f = 0; f < 32; f++) @@ -928,66 +870,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR } x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - if (imlInstruction->op_conditional_r_s32.crRegisterIndex == x64GenContext->activeCRRegister) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - } - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); if (imlInstruction->op_conditional_r_s32.bitMustBeSet) x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); @@ -1002,10 +885,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->operation == PPCREC_IML_OP_ADD) + if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1027,7 +909,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { // registerResult = registerOperand1 - registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1060,7 +941,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegA = imlInstruction->op_r_r_r.registerA; sint32 rRegB = imlInstruction->op_r_r_r.registerB; @@ -1080,7 +960,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand1 * registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1103,7 +982,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1146,7 +1024,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1184,8 +1061,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // x86's shift and rotate instruction have the shift amount hardwired to the CL register // since our register allocator doesn't support instruction based fixed phys registers yet // we'll instead have to temporarily shuffle registers around @@ -1266,7 +1141,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1302,7 +1176,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1340,7 +1213,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_ORC ) { // registerResult = registerOperand1 | ~registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1361,8 +1233,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR); auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); @@ -1399,7 +1269,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regR = _reg8(imlInstruction->op_compare.registerResult); auto regA = _reg32(imlInstruction->op_compare.registerOperandA); auto regB = _reg32(imlInstruction->op_compare.registerOperandB); @@ -1412,7 +1281,6 @@ bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regR = _reg8(imlInstruction->op_compare_s32.registerResult); auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA); sint32 imm = imlInstruction->op_compare_s32.immS32; @@ -1425,7 +1293,6 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool); bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue; x64GenContext->emitter->TEST_bb(regBool, regBool); @@ -1436,7 +1303,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64GenContext->emitter->JMP_j32(0); return true; @@ -1453,7 +1319,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1464,7 +1329,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); @@ -1474,7 +1338,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); if (imlInstruction->operation == PPCREC_IML_OP_AND) @@ -1487,7 +1350,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) { // registerResult = ((registerResult<<op_r_r_s32.immS32; uint32 mb = (vImm>>0)&0xFF; uint32 me = (vImm>>8)&0xFF; @@ -1510,7 +1372,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1523,7 +1384,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); @@ -1544,8 +1404,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR); auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; @@ -1604,73 +1462,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec else { uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - if (imlInstruction->op_conditionalJump.crRegisterIndex == x64GenContext->activeCRRegister ) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - cemu_assert_debug(false); // should not reach? - } x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); cemu_assert_debug(imlSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); @@ -1689,7 +1480,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC @@ -1707,7 +1497,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction */ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // while these instruction do not directly affect eflags, they change the CR bit if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) { // clear cr bit @@ -1848,7 +1637,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // generate iml instruction code bool codeGenerationFailed = false; @@ -2104,7 +1892,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo void PPCRecompilerX64Gen_generateEnterRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // start of recompiler entry function x64Gen_push_reg64(&x64GenContext, X86_REG_RAX); @@ -2180,7 +1967,6 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // update instruction pointer // LR is in EDX diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index b9cb0585e..1683c5b99 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -26,9 +26,6 @@ struct x64GenContext_t delete emitter; } - // cr state - sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register - sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) // relocate offsets std::vector relocateOffsetTable2; }; @@ -75,8 +72,6 @@ enum bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext); - void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset); void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index e50052d5a..14d05d5af 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -267,7 +267,6 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen // load from memory bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; @@ -578,7 +577,6 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe // store to memory bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; @@ -690,7 +688,6 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) @@ -969,8 +966,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) @@ -1062,7 +1057,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1156,7 +1150,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc */ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 63fb5f725..f74cd2259 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -226,12 +226,8 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return ppcRecFunc; } -void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext); - bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) { - PPCRecompiler_FixLoops(ppcImlGenContext); - // isolate entry points from function flow (enterable segments must not be the target of any other segment) // this simplifies logic during register allocation PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index a4dd41016..8377671a5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -25,7 +25,6 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe // IML instruction generation void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode); void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f7492e590..435a5a7e6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -64,11 +64,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode); } -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode) -{ - ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode); -} - void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) { // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) @@ -502,7 +497,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); return true; } @@ -512,7 +507,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crMask; PPC_OPC_TEMPL_XFX(opcode, rS, crMask); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); return true; } @@ -730,7 +725,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco branchDestReg = tmpRegister; } uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } if (!BO.decrementerIgnore()) @@ -856,7 +851,7 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // rA not used, instruction turns into simple value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm); } // never updates any cr return true; @@ -3970,19 +3965,8 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction return true; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +void IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext_t& ppcImlGenContext) { - ppcImlGenContext.boundaryTracker = &boundaryTracker; - if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) - return false; - - // set range - // todo - support non-continuous functions for the range tracking? - ppcRecRange_t recRange; - recRange.ppcAddress = ppcRecFunc->ppcAddress; - recRange.ppcSize = ppcRecFunc->ppcSize; - ppcRecFunc->list_ranges.push_back(recRange); - // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { @@ -4006,16 +3990,16 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) continue; // todo: Register to register copy canReduceSegment = false; break; } - if( canReduceSegment == false ) + if (canReduceSegment == false) continue; - + // remove the branch instruction uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; @@ -4068,104 +4052,23 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) } - return true; } -void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) { - return; // deprecated + ppcImlGenContext.boundaryTracker = &boundaryTracker; + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; - //// find segments that have a (conditional) jump instruction that points in reverse direction of code flow - //// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - //// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - //uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - //for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) - //{ - // // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - // IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - // if (imlSegment->imlList.empty()) - // continue; - // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - // continue; - // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - // continue; - - // // exclude non-infinite tight loops - // if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) - // continue; - // // potential loop segment found, split this segment into four: - // // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // // P2: This segment contains the iml instructions of the original segment - // // PEntry: This segment is used to enter the function, it jumps to P0 - // // All segments are considered to be part of the same PPC instruction range - // // The first segment also retains the jump destination and enterable properties from the original segment. - // //debug_printf("--- Insert cycle counter check ---\n"); - - // PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - // imlSegment = NULL; - // IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - // IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - // IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - // // create entry point segment - // PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - // IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - // // relink segments - // IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - // IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - // IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - // IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // // update segments - // uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - // if (imlSegmentP2->isEnterable) - // enterPPCAddress = imlSegmentP2->enterPPCAddress; - // imlSegmentP0->ppcAddress = 0xFFFFFFFF; - // imlSegmentP1->ppcAddress = 0xFFFFFFFF; - // imlSegmentP2->ppcAddress = 0xFFFFFFFF; - // cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // // move segment properties from segment P2 to segment P0 - // imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - // imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - // imlSegmentP0->isEnterable = false; - // //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - // imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - // imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - // imlSegmentP2->isJumpDestination = false; - // imlSegmentP2->jumpDestinationPPCAddress = 0; - // imlSegmentP2->isEnterable = false; - // imlSegmentP2->enterPPCAddress = 0; - // imlSegmentP2->ppcAddrMin = 0; - // imlSegmentP2->ppcAddrMax = 0; - // // setup enterable segment - // if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - // { - // imlSegmentPEntry->isEnterable = true; - // imlSegmentPEntry->ppcAddress = enterPPCAddress; - // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - // } - // // assign new jumpmark to segment P2 - // imlSegmentP2->isJumpDestination = true; - // imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - // currentLoopEscapeJumpMarker++; - // // create ppc_leave instruction in segment P1 - // PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - // imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - // imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - // imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - // imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // // create cycle-based conditional instruction in segment P0 - // PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - // imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - // imlSegmentP0->imlList[0].operation = 0; - // imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - // imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // // jump instruction for PEntry - // PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - // PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); - - // // skip the newly created segments - // s += 2; - //} -} \ No newline at end of file + // IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext); + + // set range + // todo - support non-continuous functions for the range tracking? + ppcRecRange_t recRange; + recRange.ppcAddress = ppcRecFunc->ppcAddress; + recRange.ppcSize = ppcRecFunc->ppcSize; + ppcRecFunc->list_ranges.push_back(recRange); + + + return true; +} From c4b9fff24ae9392453a0087e208211ae9e39d4e9 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 3 Jan 2023 00:51:27 +0100 Subject: [PATCH 27/64] PPCRec: Rework CR bit handling CR bits are now resident in registers instead of being baked into the instruction definitions. Same for XER SO, and LWARX reservation EA and value. Reworked LWARX/STWCX, CRxx ops, compare and branch instructions. As well as RC bit handling. Not all CR-related instructions are reimplemented yet. Introduced atomic_cmp_store operation to allow implementing STWCX in architecture agnostic IML Removed legacy CR-based compare and jump operations --- .../Interpreter/PPCInterpreterALU.hpp | 20 +- .../Interpreter/PPCInterpreterInternal.h | 7 +- .../Interpreter/PPCInterpreterLoadStore.hpp | 3 +- .../Interpreter/PPCInterpreterMain.cpp | 16 +- src/Cafe/HW/Espresso/PPCState.h | 4 +- .../Recompiler/BackendX64/BackendX64.cpp | 452 +++-------- .../Recompiler/BackendX64/BackendX64.h | 7 +- .../Recompiler/BackendX64/BackendX64FPU.cpp | 200 ++--- .../Recompiler/BackendX64/x86Emitter.h | 215 +++++ .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 113 +-- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 105 ++- .../Recompiler/IML/IMLInstruction.cpp | 45 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 152 ++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 154 ++-- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 13 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 2 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 6 - .../Recompiler/PPCRecompilerImlGen.cpp | 732 ++++++++---------- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 71 +- .../Recompiler/PPCRecompilerIntermediate.cpp | 2 +- 21 files changed, 1102 insertions(+), 1219 deletions(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index fe9316f03..769344f81 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow) { if (hasOverflow) { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; + hCPU->xer_so = 1; + hCPU->xer_ov = 1; } else { - hCPU->spr.XER &= ~XER_OV; + hCPU->xer_ov = 0; } } @@ -246,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode) uint32 a = hCPU->gpr[rA]; uint32 b = hCPU->gpr[rB]; hCPU->gpr[rD] = ~a + b + 1; - // update xer + // update carry if (ppc_carry_3(~a, b, 1)) hCPU->xer_ca = 1; else @@ -848,8 +848,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -871,8 +870,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -895,8 +893,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -919,8 +916,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h index bc8458d98..bac253c4e 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h @@ -50,9 +50,9 @@ #define CR_BIT_EQ 2 #define CR_BIT_SO 3 -#define XER_SO (1<<31) // summary overflow bit -#define XER_OV (1<<30) // overflow bit #define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8 +#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO +#define XER_BIT_OV (30) // FPSCR #define FPSCR_VXSNAN (1<<24) @@ -118,7 +118,8 @@ static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r) { - hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0; + cemu_assert_debug(hCPU->xer_so <= 1); + hCPU->cr[CR_BIT_SO] = hCPU->xer_so; hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0); hCPU->cr[CR_BIT_EQ] = (r == 0); hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0 diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp index 694e05e65..264674584 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp @@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode) ppc_setCRBit(hCPU, CR_BIT_GT, 0); ppc_setCRBit(hCPU, CR_BIT_EQ, 1); } - ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0); + cemu_assert_debug(hCPU->xer_so <= 1); + ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so); // remove reservation hCPU->reservedMemAddr = 0; hCPU->reservedMemValue = 0; diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp index ace1601f4..08d6765a3 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp @@ -63,16 +63,24 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue) uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU) { uint32 xerValue = hCPU->spr.XER; - xerValue &= ~(1<xer_ca ) - xerValue |= (1<xer_ca) + xerValue |= (1 << XER_BIT_CA); + if (hCPU->xer_so) + xerValue |= (1 << XER_BIT_SO); + if (hCPU->xer_ov) + xerValue |= (1 << XER_BIT_OV); return xerValue; } void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v) { hCPU->spr.XER = v; - hCPU->xer_ca = (v>>XER_BIT_CA)&1; + hCPU->xer_ca = (v >> XER_BIT_CA) & 1; + hCPU->xer_so = (v >> XER_BIT_SO) & 1; + hCPU->xer_ov = (v >> XER_BIT_OV) & 1; } uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU) diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index ea7edfa29..8f27ee938 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -49,6 +49,8 @@ struct PPCInterpreter_t uint32 fpscr; uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..) uint8 xer_ca; // carry from xer + uint8 xer_so; + uint8 xer_ov; uint8 LSQE; uint8 PSE; // thread remaining cycles @@ -67,7 +69,7 @@ struct PPCInterpreter_t uint32 reservedMemValue; // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this + uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode struct diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index aa0663109..4fb3aa46d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -28,6 +28,10 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } +static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR64)regId; +} X86Cond _x86Cond(IMLCondition imlCond) { @@ -41,6 +45,10 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_NBE; case IMLCondition::UNSIGNED_LT: return X86_CONDITION_B; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_NLE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_L; default: break; } @@ -88,18 +96,6 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si } } -void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - sint32 crRegister = imlInstruction->crRegister; - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Set CR SO if XER SO bit is set -} - void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) { void* prevRSPTemp = hCPU->rspTemp; @@ -424,7 +420,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 8 ) { - // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( signExtend ) @@ -434,22 +429,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) - { - if( imlInstruction->op_storeLoad.immS32 != 0 ) - assert_dbg(); // not supported - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation - // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); - } else return false; return true; @@ -529,106 +508,62 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - if (imlInstruction->op_storeLoad.immS32 != 0) - assert_dbg(); // todo - // reset cr0 LT, GT and EQ - sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // realRegisterMem now holds EA - x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // EA matches reservation - // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); - // backup REG_RESV_MEMBASE - x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); - // add mem register to REG_RESV_MEMBASE - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); - // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); - // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX); - - x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - - // reset reservation - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); - - // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - // restore REG_RESV_MEMBASE - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); - - // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex()); - } else return false; return true; } +bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut)); + auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA); + auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue); + auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue); + + // make sure non of the regs are in EAX + if (regEA == X86_REG_EAX || + regBoolOut == X86_REG_EAX || + regVal == X86_REG_EAX || + regCmp == X86_REG_EAX) + { + printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n"); + return false; + } + + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc + x64GenContext->emitter->LockPrefix(); + x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut); + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { // registerResult = registerA - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - if(imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) - { - // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } - else - { + if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) - assert_dbg(); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // if movbe is available we can move and swap in a single instruction? x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA @@ -647,7 +582,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -655,7 +589,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if (imlInstruction->operation == PPCREC_IML_OP_NEG) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -663,9 +596,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { @@ -686,47 +617,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerA CMP registerB (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - return false; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else - assert_dbg(); - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); @@ -758,98 +650,50 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - // registerResult = immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { - // registerResult &= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { - // registerResult |= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { - // registerResult ^= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult <<<= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerResult CMP immS32 (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); - return true; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); - // set cr bits - uint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else - assert_dbg(); - // todo: Also set summary overflow if xer bit is set? - } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - for(sint32 f=0; f<32; f++) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - } + __debugbreak(); + //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; + //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //for(sint32 f=0; f<32; f++) + //{ + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); + // x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //} } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; - uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - for (sint32 f = 0; f < 32; f++) - { - if(((crBitMask >> f) & 1) == 0) - continue; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - } + __debugbreak(); + //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; + //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //for (sint32 f = 0; f < 32; f++) + //{ + // if(((crBitMask >> f) & 1) == 0) + // continue; + // x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); + // x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); + // x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); + //} } else { @@ -861,30 +705,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // registerResult = immS32 (conditional) - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } - - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } + cemu_assert_unimplemented(); + //if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + //{ + // // registerResult = immS32 (conditional) + // if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) + // { + // assert_dbg(); + // } + + // x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); + // uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); + // if (imlInstruction->op_conditional_r_s32.bitMustBeSet) + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // else + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // return true; + //} return false; } bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 @@ -908,7 +751,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { - // registerResult = registerOperand1 - registerOperand2 sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -940,7 +782,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegA = imlInstruction->op_r_r_r.registerA; sint32 rRegB = imlInstruction->op_r_r_r.registerB; @@ -1140,7 +981,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1175,7 +1015,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1310,15 +1149,12 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 regResult = imlInstruction->op_r_r_s32.registerResult; sint32 regOperand = imlInstruction->op_r_r_s32.registerA; uint32 immS32 = imlInstruction->op_r_r_s32.immS32; if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1328,7 +1164,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); @@ -1337,7 +1172,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); if (imlInstruction->operation == PPCREC_IML_OP_AND) @@ -1355,8 +1189,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 me = (vImm>>8)&0xFF; uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); - // save cr - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy rS to temporary register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); // rotate destination register @@ -1434,50 +1266,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) -{ - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) - { - // jump always - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - // generate jump update marker - if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) - { - // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - sint32 condition = imlInstruction->op_conditionalJump.condition; - if( condition == PPCREC_JUMP_CONDITION_E ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - else if( condition == PPCREC_JUMP_CONDITION_NE ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - else - assert_dbg(); - } - else - { - uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - cemu_assert_debug(imlSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); - if( imlInstruction->op_conditionalJump.bitMustBeSet ) - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); - } - else - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - } - } - } - return true; -} - bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { // some tests (all performed on a i7-4790K) @@ -1492,49 +1280,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction return true; } -/* -* PPC condition register operation -*/ -bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) - { - // clear cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); - return true; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - // set cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); - return true; - } - else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC ) - { - x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); - if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - return false; // untested - x64Gen_int3(x64GenContext); - x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement - } - if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC) - x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - else - x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - - x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); - - return true; - } - else - { - assert_dbg(); - } - return false; -} - void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -1567,6 +1312,22 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, { x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + } else assert_dbg(); } @@ -1603,6 +1364,22 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, { x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.registerIndex); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.registerIndex); + } else assert_dbg(); } @@ -1713,13 +1490,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) - { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); @@ -1759,12 +1529,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) + else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { + if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction)) codeGenerationFailed = true; - } } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { @@ -1822,6 +1590,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } else { debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 1683c5b99..066078cbb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -62,11 +62,6 @@ enum X86_CONDITION_NONE, // no condition, jump always }; -#define PPCREC_CR_TEMPORARY (8) // never stored -#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) - #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register @@ -86,6 +81,8 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); + // ASM gen void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v); void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 14d05d5af..473124879 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -5,6 +5,31 @@ #include "asm/x64util.h" // for recompiler_fres / frsqrte +uint32 _regF64(IMLReg r) +{ + return (uint32)r; +} + +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +{ + return (x86Assembler64::GPR8_REX)physRegId; +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -690,18 +715,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction { if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // VPUNPCKHQDQ if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) { @@ -725,170 +742,73 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // use unpckhpd here? x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP); } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM ) - { - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM) - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP) - { - // temporarily switch top/bottom of both operands and compare - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - } - } - else - x64Gen_comisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - // todo: handle FPSCR updates - // update cr - sint32 crRegister = imlInstruction->crRegister; - // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -901,7 +821,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -914,7 +833,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -925,7 +843,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -936,7 +853,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // calculate bottom half of result x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) @@ -968,10 +884,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -988,8 +900,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) { - // registerResult(fp0) = registerOperandA(fp0) + registerOperandB(fp0) - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Use AVX 3-operand VADDSD if available if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { @@ -1008,7 +918,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) { // registerResult = registerOperandA - registerOperandB - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1031,7 +940,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1059,8 +967,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc { if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // todo: Investigate if there are other optimizations possible if the operand registers overlap // generic case // 1) move frA bottom to frTemp bottom and top @@ -1074,7 +980,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Investigate if there are other optimizations possible if the operand registers overlap // 1) move frA bottom to frTemp bottom and top x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); @@ -1094,7 +999,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); @@ -1110,7 +1014,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // select bottom x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); @@ -1145,32 +1048,22 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc assert_dbg(); } -/* - * Single FPR operation - */ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // toggle sign bit x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // mask out sign bit x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // set sign bit x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit single x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit double @@ -1178,7 +1071,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit singles x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit doubles @@ -1186,7 +1078,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert bottom to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // copy to top half @@ -1197,3 +1088,44 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, cemu_assert_unimplemented(); } } + +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg8(imlInstruction->op_fpr_compare.regR); + auto regA = _regF64(imlInstruction->op_fpr_compare.regA); + auto regB = _regF64(imlInstruction->op_fpr_compare.regB); + + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); + x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB); + + if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT) + { + // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both) + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR); + return; + } + else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U) + { + // unordered case can be checked via PF + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR); + return; + } + + // remember unordered state + auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP)); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions + + X86Cond x86Cond; + switch (imlInstruction->op_fpr_compare.cond) + { + case IMLCondition::UNORDERED_LT: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR); + break; + case IMLCondition::UNORDERED_EQ: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR); + break; + default: + cemu_assert_unimplemented(); + } + x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h index 6b05a5146..eae3835db 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -84,6 +84,7 @@ class x86Assembler64 using GPR64 = X86Reg; using GPR32 = X86Reg; using GPR8_REX = X86Reg; + void LockPrefix() { _emitU8(0xF0); }; void ADD_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -3194,6 +3195,124 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void XCHG_bb(GPR8_REX dst, GPR8_REX src) + { + if ((dst >= 4) || (src >= 4)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_dd(GPR32 dst, GPR32 src) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } void MOV_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -4032,6 +4151,102 @@ class x86Assembler64 if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void CMPXCHG_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void BSWAP_d(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } + void BSWAP_q(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } void BT_du8(GPR32 dst, u8 imm) { if (((dst & 8) != 0)) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index cd40de7f4..d24fec87d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -18,7 +18,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) // loops using BDNZ are assumed to always be finite for(const IMLInstruction& instIt : imlSegment->imlList) { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB) { return true; } @@ -92,59 +92,60 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) { - crTracking->readCRBits = 0; - crTracking->writtenCRBits = 0; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - crTracking->readCRBits = (crBitFlag); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - crTracking->readCRBits = crBitFlag; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - { - crTracking->readCRBits = 0xFFFFFFFF; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crA); - crTracking->readCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crB); - crTracking->readCRBits |= crBitFlag; - } - else - assert_dbg(); - } - else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) - { - crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); - } - else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - // overwrites CR0 - crTracking->writtenCRBits |= (0xF << 0); - } + __debugbreak(); + //crTracking->readCRBits = 0; + //crTracking->writtenCRBits = 0; + //if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); + // crTracking->readCRBits = (crBitFlag); + // } + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + //{ + // uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); + // crTracking->readCRBits = crBitFlag; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) + //{ + // crTracking->readCRBits = 0xFFFFFFFF; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) + //{ + // crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CR) + //{ + // if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || + // imlInstruction->operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // } + // else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || + // imlInstruction->operation == PPCREC_IML_OP_CR_ORC || + // imlInstruction->operation == PPCREC_IML_OP_CR_AND || + // imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crA); + // crTracking->readCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crB); + // crTracking->readCRBits |= crBitFlag; + // } + // else + // assert_dbg(); + //} + //else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) + //{ + // crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); + //} + //else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) + //{ + // // overwrites CR0 + // crTracking->writtenCRBits |= (0xF << 0); + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 4dafaf18b..2fbf2b6f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -206,6 +206,18 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); } + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); else strOutput.add("ukn"); strOutput.add(")"); @@ -217,11 +229,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R) { @@ -231,10 +238,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -274,9 +277,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); - if(!inst.op_conditionalJump2.mustBeTrue) + if (!inst.op_conditionalJump2.mustBeTrue) strOutput.add("(inverted)"); } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); @@ -286,11 +293,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA); IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) { @@ -311,55 +313,42 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex); IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - { - if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); + else + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); - - if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); - else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - } - else if (inst.type == PPCREC_IML_TYPE_CJUMP) - { - if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) - strOutput.add("JE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) - strOutput.add("JNE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) - strOutput.add("JG"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) - strOutput.add("JGE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) - strOutput.add("JL"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) - strOutput.add("JLE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) - strOutput.add("JALW"); // jump always - else - cemu_assert_unimplemented(); - strOutput.addFmt(" (cr{})", inst.crRegister); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); } else if (inst.type == PPCREC_IML_TYPE_NO_OP) { @@ -487,10 +476,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); strOutput.add(" (conditional)"); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> and update CR{}", inst.crRegister); - } } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 52e19e8c5..b7e2294cc 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -154,7 +154,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // carry is always written registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers } @@ -222,9 +222,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) registersUsed->readNamedReg3 = op_storeLoad.registerMem2; } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + registersUsed->readNamedReg1 = op_atomic_compare_store.regEA; + registersUsed->readNamedReg2 = op_atomic_compare_store.regCompareValue; + registersUsed->readNamedReg3 = op_atomic_compare_store.regWriteValue; + registersUsed->writtenNamedReg1 = op_atomic_compare_store.regBoolOut; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -467,6 +470,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else cemu_assert_unimplemented(); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + registersUsed->writtenNamedReg1 = op_fpr_compare.regR; + registersUsed->readFPR1 = op_fpr_compare.regA; + registersUsed->readFPR2 = op_fpr_compare.regB; + } else { cemu_assert_unimplemented(); @@ -560,7 +569,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl { op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { // no effect on registers } @@ -613,9 +622,12 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable); + op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable); + op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable); + op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -689,6 +701,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl else if (type == PPCREC_IML_TYPE_FPR_R) { } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable); + } else { cemu_assert_unimplemented(); @@ -725,7 +741,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -753,9 +769,9 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -803,6 +819,11 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); + } else { cemu_assert_unimplemented(); @@ -839,7 +860,7 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -867,9 +888,9 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 9491136e3..08955b39c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -29,13 +29,6 @@ enum PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 PPCREC_IML_OP_MFCR, // copy cr to gpr PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) - // condition register - PPCREC_IML_OP_CR_CLEAR, // clear cr bit - PPCREC_IML_OP_CR_SET, // set cr bit - PPCREC_IML_OP_CR_OR, // OR cr bits - PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first - PPCREC_IML_OP_CR_AND, // AND cr bits - PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -54,9 +47,9 @@ enum PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_TOP, + PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated PPCREC_IML_OP_FPR_NEGATE_BOTTOM, PPCREC_IML_OP_FPR_NEGATE_PAIR, PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) @@ -111,21 +104,6 @@ enum PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak }; -enum // deprecated condition codes -{ - PPCREC_JUMP_CONDITION_NONE, - PPCREC_JUMP_CONDITION_E, // equal / zero - PPCREC_JUMP_CONDITION_NE, // not equal / not zero - PPCREC_JUMP_CONDITION_LE, // less or equal - PPCREC_JUMP_CONDITION_L, // less - PPCREC_JUMP_CONDITION_GE, // greater or equal - PPCREC_JUMP_CONDITION_G, // greater - // special case: - PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling - PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow - -}; - enum class IMLCondition : uint8 { EQ, @@ -137,14 +115,17 @@ enum class IMLCondition : uint8 SIGNED_OVERFLOW, SIGNED_NOVERFLOW, -}; -enum -{ - PPCREC_CR_MODE_COMPARE_SIGNED, - PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare + // floating point conditions + UNORDERED_GT, // a > b, false if either is NaN + UNORDERED_LT, // a < b, false if either is NaN + UNORDERED_EQ, // a == b, false if either is NaN + UNORDERED_U, // unordered (true if either operand is NaN) - PPCREC_CR_MODE_LOGICAL, + ORDERED_GT, + ORDERED_LT, + ORDERED_EQ, + ORDERED_U }; enum @@ -164,18 +145,20 @@ enum PPCREC_IML_TYPE_NAME_R, // name* = r* PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, - PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 - PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) - // new style of handling conditions and branches: + // conditions and branches PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm - PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition - PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register + PPCREC_IML_TYPE_JUMP, // jump always + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register + + // atomic + PPCREC_IML_TYPE_ATOMIC_CMP_STORE, - // conditional + // conditional (legacy) PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // FPR PPCREC_IML_TYPE_FPR_R_NAME, // name = f* PPCREC_IML_TYPE_FPR_NAME_R, // f* = name @@ -187,6 +170,8 @@ enum PPCREC_IML_TYPE_FPR_R_R_R, PPCREC_IML_TYPE_FPR_R_R_R_R, PPCREC_IML_TYPE_FPR_R, + + PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* }; enum @@ -197,15 +182,18 @@ enum PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 - PPCREC_NAME_XER_CA = 6000, // carry bit + PPCREC_NAME_XER_CA = 6000, // carry bit from XER + PPCREC_NAME_XER_OV = 6001, // overflow bit from XER + PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER + PPCREC_NAME_CR = 7000, // CR register bits (31 to 0) + PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31, + PPCREC_NAME_CPU_MEMRES_EA = 8000, + PPCREC_NAME_CPU_MEMRES_VAL = 8001 }; -// special cases for LOAD/STORE -#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) -#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) - -#define PPC_REC_INVALID_REGISTER 0xFF +#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead +// deprecated, use Espresso namespace #define PPCREC_CR_BIT_LT 0 #define PPCREC_CR_BIT_GT 1 #define PPCREC_CR_BIT_EQ 2 @@ -337,13 +325,12 @@ struct IMLUsedRegisters using IMLReg = uint8; +inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; + struct IMLInstruction { uint8 type; uint8 operation; - uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. - uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior - uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated union { struct @@ -352,13 +339,11 @@ struct IMLInstruction }padding; struct { - // R (op) A [update cr* in mode *] uint8 registerResult; uint8 registerA; }op_r_r; struct { - // R = A (op) B [update cr* in mode *] uint8 registerResult; uint8 registerA; uint8 registerB; @@ -385,13 +370,11 @@ struct IMLInstruction }op_r_r_s32_carry; struct { - // R/F = NAME or NAME = R/F uint8 registerIndex; uint32 name; - }op_r_name; + }op_r_name; // alias op_name_r struct { - // R (op) s32 [update cr* in mode *] uint8 registerIndex; sint32 immS32; }op_r_immS32; @@ -402,13 +385,6 @@ struct IMLInstruction uint16 paramU16; }op_macro; struct - { - uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditionalJump; // legacy jump - struct { uint8 registerData; uint8 registerMem; @@ -450,6 +426,13 @@ struct IMLInstruction uint8 registerResult; }op_fpr_r; struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; + IMLCondition cond; + }op_fpr_compare; + struct { uint8 crD; // crBitIndex (result) uint8 crA; // crBitIndex @@ -474,6 +457,13 @@ struct IMLInstruction uint8 registerBool; bool mustBeTrue; }op_conditionalJump2; + struct + { + IMLReg regEA; + IMLReg regCompareValue; + IMLReg regWriteValue; + IMLReg regBoolOut; // boolean 0/1 + }op_atomic_compare_store; // conditional operations (emitted if supported by target platform) struct { @@ -495,7 +485,6 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || - type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) @@ -508,8 +497,6 @@ struct IMLInstruction { type = PPCREC_IML_TYPE_NO_OP; operation = 0; - crRegister = PPC_REC_INVALID_REGISTER; - crMode = 0; } void make_debugbreak(uint32 currentPPCAddress = 0) @@ -530,7 +517,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; } @@ -539,8 +525,6 @@ struct IMLInstruction // operation with two register operands (e.g. "t0 = t1") this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r.registerResult = registerResult; this->op_r_r.registerA = registerA; } @@ -550,8 +534,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_immS32.registerIndex = registerIndex; this->op_r_immS32.immS32 = immS32; } @@ -561,8 +543,6 @@ struct IMLInstruction // operation with three register operands (e.g. "t0 = t1 + t4") this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_r.registerResult = registerResult; this->op_r_r_r.registerA = registerA; this->op_r_r_r.registerB = registerB; @@ -572,8 +552,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_R_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_r_carry.regR = registerResult; this->op_r_r_r_carry.regA = registerA; this->op_r_r_r_carry.regB = registerB; @@ -585,8 +563,6 @@ struct IMLInstruction // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_s32.registerResult = registerResult; this->op_r_r_s32.registerA = registerA; this->op_r_r_s32.immS32 = immS32; @@ -596,8 +572,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_s32_carry.regR = registerResult; this->op_r_r_s32_carry.regA = registerA; this->op_r_r_s32_carry.immS32 = immS32; @@ -608,8 +582,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare.registerResult = registerResult; this->op_compare.registerOperandA = registerA; this->op_compare.registerOperandB = registerB; @@ -620,8 +592,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE_S32; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare_s32.registerResult = registerResult; this->op_compare_s32.registerOperandA = registerA; this->op_compare_s32.immS32 = immS32; @@ -632,8 +602,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_conditionalJump2.registerBool = registerBool; this->op_conditionalJump2.mustBeTrue = mustBeTrue; } @@ -642,8 +610,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; } // load from memory @@ -651,7 +617,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_LOAD; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerDestination; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -665,7 +630,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_STORE; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerSource; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -674,6 +638,26 @@ struct IMLInstruction this->op_storeLoad.flags2.signExtend = false; } + void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput) + { + this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE; + this->operation = 0; + this->op_atomic_compare_store.regEA = regEA; + this->op_atomic_compare_store.regCompareValue = regCompareValue; + this->op_atomic_compare_store.regWriteValue = regWriteValue; + this->op_atomic_compare_store.regBoolOut = regSuccessOutput; + } + + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_FPR_COMPARE; + this->operation = -999; + this->op_fpr_compare.regR = regR; + this->op_fpr_compare.regA = regA; + this->op_fpr_compare.regB = regB; + this->op_fpr_compare.cond = cond; + } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index f67b49e15..a1569d335 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -511,6 +511,8 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, I */ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { + __debugbreak(); // deprecated + if (imlSegment->nextSegmentIsUncertain) { return 0; @@ -535,81 +537,83 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IM void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for(IMLInstruction& instIt : segIt->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_CJUMP) - { - if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - } - else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) - { - segIt->crBitsRead |= 0xFFFFFFFF; - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) - { - segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); - } - else if( instIt.type == PPCREC_IML_TYPE_CR ) - { - if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || - instIt.operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - } - else if (instIt.operation == PPCREC_IML_OP_CR_OR || - instIt.operation == PPCREC_IML_OP_CR_ORC || - instIt.operation == PPCREC_IML_OP_CR_AND || - instIt.operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - crBitFlag = 1 << (instIt.op_cr.crA); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - crBitFlag = 1 << (instIt.op_cr.crB); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - } - else - cemu_assert_unimplemented(); - } - else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); - } - else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) - { - // overwrites CR0 - segIt->crBitsWritten |= (0xF<<0); - } - } - } - // flag instructions that write to CR where we can ignore individual CR bits - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for (IMLInstruction& instIt : segIt->imlList) - { - if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); - uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; - instIt.crIgnoreMask = crIgnoreMask; - } - } - } + __debugbreak(); // deprecated + + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for(IMLInstruction& instIt : segIt->imlList) + // { + // if (instIt.type == PPCREC_IML_TYPE_CJUMP) + // { + // if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // } + // else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) + // { + // segIt->crBitsRead |= 0xFFFFFFFF; + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) + // { + // segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); + // } + // else if( instIt.type == PPCREC_IML_TYPE_CR ) + // { + // if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || + // instIt.operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // } + // else if (instIt.operation == PPCREC_IML_OP_CR_OR || + // instIt.operation == PPCREC_IML_OP_CR_ORC || + // instIt.operation == PPCREC_IML_OP_CR_AND || + // instIt.operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // crBitFlag = 1 << (instIt.op_cr.crA); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // crBitFlag = 1 << (instIt.op_cr.crB); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // } + // else + // cemu_assert_unimplemented(); + // } + // else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); + // } + // else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) + // { + // // overwrites CR0 + // segIt->crBitsWritten |= (0xF<<0); + // } + // } + //} + //// flag instructions that write to CR where we can ignore individual CR bits + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for (IMLInstruction& instIt : segIt->imlList) + // { + // if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); + // uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); + // uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; + // instIt.crIgnoreMask = crIgnoreMask; + // } + // } + //} } //bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index add7098ed..8ef0669e4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,7 +1,7 @@ #pragma once #include "IMLInstruction.h" -#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic +#define IML_RA_VIRT_REG_COUNT_MAX (40 + 32) // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic struct IMLSegmentPoint { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index f74cd2259..dd445b2c9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -186,6 +186,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x30DF5F8) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + //if (ppcRecFunc->ppcAddress == 0x11223344) //{ // //debug_printf("----------------------------------------\n"); @@ -302,9 +310,8 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); - // remove redundant name load and store instructions - PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); + //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); + //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 5a4484dac..c80fad8d9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -6,7 +6,7 @@ #define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1)) -#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) +#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) struct ppcRecRange_t { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 8377671a5..d1475ffe4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -23,13 +23,7 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); - - -// IML instruction generation (new style, can generate new instructions but also overwrite existing ones) - void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 435a5a7e6..2a1f2c716 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -17,8 +17,7 @@ struct PPCBasicBlockInfo uint32 startAddress; uint32 lastAddress; // inclusive bool isEnterable{ false }; - //uint32 enterableAddress{}; -> covered by startAddress - bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasContinuedFlow{ true }; // non-branch path goes to next segment, assumed by default bool hasBranchTarget{ false }; uint32 branchTarget{}; @@ -52,7 +51,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext { IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); memset(&inst, 0x00, sizeof(IMLInstruction)); - inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default return &inst; } @@ -82,7 +80,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte memset(imlInstruction, 0, sizeof(IMLInstruction)); imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // r_s32 operation imlInstruction->op_conditional_r_s32.registerIndex = registerIndex; imlInstruction->op_conditional_r_s32.immS32 = immS32; @@ -92,48 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; } - -// jump based on segment branches -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) -{ - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) -{ - // multiple variations: - // operation involving only one cr bit (like clear crD bit) - // operation involving three cr bits (like crD = crA or crB) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CR; - imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; - imlInstruction->op_cr.crD = crD; - imlInstruction->op_cr.crA = crA; - imlInstruction->op_cr.crB = crB; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); @@ -145,7 +100,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -165,7 +119,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -303,6 +256,13 @@ uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGen return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); } +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) +{ + cemu_assert_debug(crReg < 8); + cemu_assert_debug(crBit < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + (crReg * 4) + (crBit)); +} + /* * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register @@ -408,7 +368,18 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin // for handling RC bit of many instructions void PPCImlGen_UpdateCR0Logical(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + // todo - SO bit? + + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegGT, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegEQ, IMLCondition::EQ); + + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER + + //ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); } void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -494,69 +465,80 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 rD, rA, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); - return true; + printf("MFCR: Not implemented\n"); + return false; + + //sint32 rD, rA, rB; + //PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); + //return true; } bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - uint32 rS; - uint32 crMask; - PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); - return true; -} + printf("MTCRF: Not implemented\n"); + return false; -void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + //uint32 rS; + //uint32 crMask; + //PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); + //return true; } -void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) { uint32 cr; int rA, rB; PPC_OPC_TEMPL_X(opcode, cr, rA, rB); cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} -void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); - cr >>= 2; - sint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + IMLReg gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); } -void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) { uint32 cr; int rA; uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); + if (isUnsigned) + { + PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); + } cr >>= 2; - uint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); + + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); + + return true; } bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -575,8 +557,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) return true; } // is jump destination within recompiled function? - if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) + ppcImlGenContext->emitInst().make_jump_new(); else ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; @@ -589,6 +571,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + // decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK) + Espresso::BOField boField(BO); + uint32 crRegister = BI/4; uint32 crBit = BI%4; uint32 jumpCondition = 0; @@ -597,6 +582,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; + IMLReg regCRBit; + if (!ignoreCondition) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 jumpAddressDest = BD; if( (opcode&PPC_OPC_AA) == 0 ) { @@ -605,35 +594,14 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { + if (useDecrementer) + return false; // conditional function calls are not supported if( ignoreCondition == false ) { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } @@ -644,8 +612,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { if( ignoreCondition == false ) return false; // not supported for the moment - uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); + IMLReg tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); @@ -661,34 +629,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } else { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); } else { @@ -713,6 +657,10 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crRegister = BI/4; uint32 crBit = BI%4; + IMLReg regCRBit; + if (!BO.conditionIgnore()) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); if (LK) { @@ -738,39 +686,9 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // no decrementer but CR check cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - // generate jump condition - uint32 jumpCondition = 0; - if (!BO.conditionInverted()) - { - // CR bit must be set - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - - // write the dynamic branch instruction to a new segment that is set as a branch target for the current segment PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, !BO.conditionInverted()); - - + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, !BO.conditionInverted()); bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); } else @@ -1706,31 +1624,12 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } -bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_LOAD_LWARX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_LOAD_LWARX_MARKER, false, true); - return true; -} - void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm; + cemu_assert_debug(rA != 0); sint32 index = 0; while( rD <= 31 ) { @@ -1935,22 +1834,6 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte return true; } -bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_STORE_STWCX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_STORE_STWCX_MARKER, true); - return true; -} - bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rS, rB; @@ -1972,6 +1855,7 @@ void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); + cemu_assert_debug(rA != 0); sint32 index = 0; while( rS <= 31 ) { @@ -2063,6 +1947,86 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } +bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rD, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // calculate EA + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + // load word + ppcImlGenContext->emitInst().make_r_memory(regD, regMemResEA, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResVal, regD); + return true; +} + +bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regData = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg regTmpDataBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + // calculate EA + IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); + // get CR bit regs and set LT, GT and SO immediately + IMLReg regCrLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); + IMLReg regCrGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); + IMLReg regCrEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); + IMLReg regCrSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regXerSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO); + // get regs for reservation address and value + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // compare calculated EA with reservation + IMLReg regTmpBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_conditional_jump_new(regTmpBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken, EA matching */ + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpDataBE, regData); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpCompareBE, regMemResVal); + ppcImlGenContext->emitInst().make_atomic_cmp_store(regMemResEA, regTmpCompareBE, regTmpDataBE, regCrEQ); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, EA mismatching */ + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrEQ, 0); + } + ); + + // reset reservation + // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required + // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure + // In contrast, "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared + // There may also be differences between individual PPC generations + // In disassembly I have never seen more than one STWCX after each LWARX, which hints at reservation always being cleared or at least the compiler assuming this + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); + + return true; +} + bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rB; @@ -2339,41 +2303,23 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg gprDestReg; if( rS == rB ) { // xor register with itself - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS ^ rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - // rA ^= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } + IMLReg gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg, gprSource2Reg); } + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); return true; } @@ -2427,15 +2373,9 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2443,15 +2383,9 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2459,15 +2393,9 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORI does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2475,22 +2403,19 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORIS does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_OR, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB); return true; } @@ -2498,7 +2423,12 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ORC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); return true; } @@ -2506,7 +2436,10 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_AND, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB); return true; } @@ -2514,7 +2447,12 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ANDC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); return true; } @@ -2522,17 +2460,15 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, clear bit in crD - // PPC's assert() uses this to pass a parameter to OSPanic - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_CLEAR, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0); return true; } - else - { - return false; - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regCrB); return true; } @@ -2540,16 +2476,17 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, set bit in crD - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_SET, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); return true; } - else - { - return false; - } + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp); return true; } @@ -2682,15 +2619,18 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 25, 5)) { case 0: - PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 1: - PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 2: - PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; default: @@ -2843,14 +2783,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) ) + if (!PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; case 10: // CMPLI - PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; break; case 11: // CMPI - PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, false)) + unsupportedInstructionFound = true; break; case 12: // ADDIC if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) @@ -2964,7 +2906,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 30, 10)) { case 0: - PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, false); break; case 4: PPCRecompilerImlGen_TW(ppcImlGenContext, opcode); @@ -3009,7 +2951,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 32: - PPCRecompilerImlGen_CMPL(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, true); // CMPL break; case 40: if (PPCRecompilerImlGen_SUBF(ppcImlGenContext, opcode) == false) @@ -3764,15 +3706,7 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext } // check last instruction of segment IMLInstruction* imlInstruction = segIt->GetLastInstruction(); - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - cemu_assert_debug(segIt->GetBranchTaken()); - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - cemu_assert_debug(segIt->GetBranchNotTaken()); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) { auto macroType = imlInstruction->operation; switch (macroType) @@ -3854,7 +3788,6 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; - seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; seg->imlList[0].op_macro.param = ppcInstructionCount; } @@ -3937,20 +3870,13 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction cemu_assert_debug(seg->GetBranchTaken()); cemu_assert_debug(seg->GetBranchNotTaken()); } - if (inst->type == PPCREC_IML_TYPE_CJUMP) + if (inst->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) { - if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) - { - debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", (int)segIndex); - IMLDebug_Dump(&ppcImlGenContext); - cemu_assert_error(); - } - } - else - { - // proper error checking for branch-always (or branch-never if invert bit is set) + debug_printf("---------------- SegmentDump (Missing branch for conditional jump in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); } } } @@ -3968,90 +3894,90 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction void IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext_t& ppcImlGenContext) { // optimization pass - replace segments with conditional MOVs if possible - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) - continue; // not a branching segment - IMLInstruction* lastInstruction = segIt->GetLastInstruction(); - if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) - continue; - IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; - IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; - if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) - continue; - if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) - continue; - if (conditionalSegment->list_prevSegments.size() != 1) - continue; // the reduced segment must not be the target of any other branch - if (conditionalSegment->isEnterable) - continue; - // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) - bool canReduceSegment = true; - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - continue; - // todo: Register to register copy - canReduceSegment = false; - break; - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) + // continue; // not a branching segment + // IMLInstruction* lastInstruction = segIt->GetLastInstruction(); + // if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) + // continue; + // IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; + // IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; + // if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) + // continue; + // if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) + // continue; + // if (conditionalSegment->list_prevSegments.size() != 1) + // continue; // the reduced segment must not be the target of any other branch + // if (conditionalSegment->isEnterable) + // continue; + // // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) + // bool canReduceSegment = true; + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // continue; + // // todo: Register to register copy + // canReduceSegment = false; + // break; + // } - if (canReduceSegment == false) - continue; + // if (canReduceSegment == false) + // continue; - // remove the branch instruction - uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; - uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; - bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - lastInstruction->make_no_op(); + // // remove the branch instruction + // uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; + // uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; + // bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; + // lastInstruction->make_no_op(); - // append conditional moves based on branch condition - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); - else - assert_dbg(); - } - // update segment links - // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - IMLSegment_RemoveLink(segIt, conditionalSegment); - IMLSegment_RemoveLink(segIt, finalSegment); - IMLSegment_RemoveLink(conditionalSegment, finalSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); - // remove all instructions from conditional segment - conditionalSegment->imlList.clear(); - - // if possible, merge imlSegment with finalSegment - if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) - { - // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - IMLSegment_RemoveLink(segIt, finalSegment); - if (finalSegment->nextSegmentBranchNotTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); - } - if (finalSegment->nextSegmentBranchTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchTaken(segIt, tempSegment); - } - // copy IML instructions - cemu_assert_debug(segIt != finalSegment); - for (sint32 f = 0; f < finalSegment->imlList.size(); f++) - { - memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); - } - finalSegment->imlList.clear(); - } + // // append conditional moves based on branch condition + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); + // else + // assert_dbg(); + // } + // // update segment links + // // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment + // IMLSegment_RemoveLink(segIt, conditionalSegment); + // IMLSegment_RemoveLink(segIt, finalSegment); + // IMLSegment_RemoveLink(conditionalSegment, finalSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); + // // remove all instructions from conditional segment + // conditionalSegment->imlList.clear(); + + // // if possible, merge imlSegment with finalSegment + // if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) + // { + // // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() + // IMLSegment_RemoveLink(segIt, finalSegment); + // if (finalSegment->nextSegmentBranchNotTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); + // } + // if (finalSegment->nextSegmentBranchTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchTaken(segIt, tempSegment); + // } + // // copy IML instructions + // cemu_assert_debug(segIt != finalSegment); + // for (sint32 f = 0; f < finalSegment->imlList.size(); f++) + // { + // memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); + // } + // finalSegment->imlList.clear(); + // } - // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) - } + // // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) + //} } bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 95cfd176d..b8986db41 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -1,14 +1,16 @@ +#include "Cafe/HW/Espresso/EspressoISA.h" #include "../Interpreter/PPCInterpreterInternal.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); + void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -23,7 +25,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -39,7 +40,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -54,7 +54,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -73,7 +72,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI imlInstruction->operation = operation; imlInstruction->op_fpr_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r.registerOperand = registerOperand; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r.flags = 0; } @@ -86,7 +84,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp imlInstruction->op_fpr_r_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1; imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r.flags = 0; } @@ -100,7 +97,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA; imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB; imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r_r.flags = 0; } @@ -112,7 +108,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml imlInstruction->type = PPCREC_IML_TYPE_FPR_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r.registerResult = registerResult; - imlInstruction->crRegister = crRegister; } /* @@ -916,12 +911,33 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 crfD, frA, frB; - PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); + printf("FCMPO: Not implemented\n"); + return false; + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo - set fpscr + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + //PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } @@ -930,9 +946,21 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr + return true; } @@ -1837,6 +1865,9 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPO0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; uint32 c=0; frB = (opcode>>11)&0x1F; @@ -1851,6 +1882,9 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; @@ -1863,6 +1897,9 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU1: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 7b4b94fbb..61be66aa1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -81,7 +81,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0); + entrySegment->imlList.data()[0].make_jump_new(); IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false; From a1c8f6fd3725f4d2e630247eb27cb23c083f9714 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 3 Jan 2023 06:18:22 +0100 Subject: [PATCH 28/64] PPCRec: Refactoring and clean up --- .../Recompiler/BackendX64/BackendX64.cpp | 13 - .../Recompiler/IML/IMLInstruction.cpp | 4 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 26 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 2 +- .../Recompiler/PPCRecompilerImlGen.cpp | 735 ++++++++---------- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 76 +- .../Recompiler/PPCRecompilerIntermediate.cpp | 2 +- 7 files changed, 383 insertions(+), 475 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 4fb3aa46d..049b7345b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1049,19 +1049,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, if( rRegResult != X86_REG_RDX ) x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); } - else if( imlInstruction->operation == PPCREC_IML_OP_ORC ) - { - // registerResult = registerOperand1 | ~registerOperand2 - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - if( rRegResult != rRegOperand1 ) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); - } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index b7e2294cc..eedbb1eb2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -26,7 +26,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_R) { - if (operation == PPCREC_IML_OP_COMPARE_SIGNED || operation == PPCREC_IML_OP_COMPARE_UNSIGNED || operation == PPCREC_IML_OP_DCBZ) + if (operation == PPCREC_IML_OP_DCBZ) { // both operands are read only registersUsed->readNamedReg1 = op_r_r.registerResult; @@ -60,7 +60,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_S32) { - if (operation == PPCREC_IML_OP_COMPARE_SIGNED || operation == PPCREC_IML_OP_COMPARE_UNSIGNED || operation == PPCREC_IML_OP_MTCRF) + if (operation == PPCREC_IML_OP_MTCRF) { // operand register is read only registersUsed->readNamedReg1 = op_r_immS32.registerIndex; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 08955b39c..39803fbff 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -4,8 +4,6 @@ enum { PPCREC_IML_OP_ASSIGN, // '=' operator PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) - PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result @@ -14,7 +12,6 @@ enum // binary operation PPCREC_IML_OP_OR, // '|' operator - PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first PPCREC_IML_OP_AND, // '&' operator PPCREC_IML_OP_XOR, // '^' operator PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator @@ -193,12 +190,6 @@ enum #define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead -// deprecated, use Espresso namespace -#define PPCREC_CR_BIT_LT 0 -#define PPCREC_CR_BIT_GT 1 -#define PPCREC_CR_BIT_EQ 2 -#define PPCREC_CR_BIT_SO 3 - enum { // fpr load @@ -519,10 +510,8 @@ struct IMLInstruction this->operation = 0; } - - void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA) { - // operation with two register operands (e.g. "t0 = t1") this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; this->op_r_r.registerResult = registerResult; @@ -530,7 +519,7 @@ struct IMLInstruction } - void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint32 crMode = 0) + void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32) { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; @@ -538,9 +527,8 @@ struct IMLInstruction this->op_r_immS32.immS32 = immS32; } - void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB) { - // operation with three register operands (e.g. "t0 = t1 + t4") this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; this->op_r_r_r.registerResult = registerResult; @@ -558,9 +546,8 @@ struct IMLInstruction this->op_r_r_r_carry.regCarry = registerCarry; } - void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32) { - // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; this->op_r_r_s32.registerResult = registerResult; @@ -598,7 +585,7 @@ struct IMLInstruction this->op_compare_s32.cond = cond; } - void make_conditional_jump_new(uint8 registerBool, bool mustBeTrue) + void make_conditional_jump(uint8 registerBool, bool mustBeTrue) { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; @@ -606,7 +593,7 @@ struct IMLInstruction this->op_conditionalJump2.mustBeTrue = mustBeTrue; } - void make_jump_new() + void make_jump() { this->type = PPCREC_IML_TYPE_JUMP; this->operation = -999; @@ -660,7 +647,6 @@ struct IMLInstruction void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; - //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); void RewriteGPR(const std::unordered_map& translationTable); void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index d1475ffe4..fc783b8a0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -15,7 +15,7 @@ void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); // GPR register management -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); +uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // FPR register management diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 2a1f2c716..42c0c3bb6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -7,6 +7,8 @@ #include "IML/IMLRegisterAllocatorRanges.h" #include "PPCFunctionBoundaryTracker.h" +bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); + struct PPCBasicBlockInfo { PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress) @@ -23,7 +25,7 @@ struct PPCBasicBlockInfo // associated IML segments IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments - IMLSegment* appendSegment{}; // last segment in chain, new instructions should be appended to this segment + IMLSegment* appendSegment{}; // last segment in chain, additional instructions should be appended to this segment void SetInitialSegment(IMLSegment* seg) { @@ -44,9 +46,6 @@ struct PPCBasicBlockInfo } }; -bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); - IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); @@ -54,24 +53,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext return &inst; } -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) -{ - if (imlInstruction) - __debugbreak(); // not supported - - ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode); -} - -void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) -{ - // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_NAME_R; - imlInstruction->operation = operation; - imlInstruction->op_r_name.registerIndex = registerIndex; - imlInstruction->op_r_name.name = name; -} - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { if(imlInstruction == NULL) @@ -89,14 +70,8 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; } -void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) -{ - ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { - // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; @@ -108,14 +83,8 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } -void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) -{ - ppcImlGenContext->emitInst().make_memory_r(registerSource, registerMemory, immS32, copyWidth, switchEndian); -} - void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { - // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; @@ -127,7 +96,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } - // create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards template void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken) @@ -154,7 +122,7 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P ppcImlGenContext.currentOutputSegment = segBranchNotTaken; genSegmentBranchNotTaken(ppcImlGenContext); cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); - ppcImlGenContext.emitInst().make_jump_new(); + ppcImlGenContext.emitInst().make_jump(); // make merge segment the new write segment ppcImlGenContext.currentOutputSegment = segMerge; basicBlockInfo.appendSegment = segMerge; @@ -224,14 +192,12 @@ uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcIm * Loads a PPC gpr into any of the available IML registers * If loadNew is false, it will reuse already loaded instances */ -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) +uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - if( loadNew == false ) - { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - } + uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); + if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER) + return loadedRegisterIndex; + uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); return registerIndex; } @@ -249,18 +215,37 @@ uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenCo return registerIndex; } -// get throw-away register. Only valid for the scope of a single translated instruction -// be careful to not collide with manually loaded temporary register -uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGenContext, uint32 temporaryIndex) +IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +{ + cemu_assert_debug(index < 32); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + index); +} + +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); + cemu_assert_debug(index < 32); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + index); } -IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) { cemu_assert_debug(crReg < 8); cemu_assert_debug(crBit < 4); - return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + (crReg * 4) + (crBit)); + return _GetRegCR(ppcImlGenContext, (crReg * 4) + crBit); +} + +IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +{ + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); +} + +// get throw-away register. Only valid for the scope of a single translated instruction +// be careful to not collide with manually loaded temporary register +uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +{ + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } /* @@ -366,11 +351,11 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin } // for handling RC bit of many instructions -void PPCImlGen_UpdateCR0Logical(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) +void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) { - IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); - IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); - IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); // todo - SO bit? ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT); @@ -398,19 +383,14 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); + IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); if (spr == SPR_CTR || spr == SPR_LR) { - uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (gprReg == PPC_REC_INVALID_REGISTER) - gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { - uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (gprReg == PPC_REC_INVALID_REGISTER) - gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; @@ -425,16 +405,15 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); + IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); if (spr == SPR_LR || spr == SPR_CTR) { uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else @@ -450,7 +429,7 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); - + if (spr == 268 || spr == 269) { // TBL / TBU @@ -495,14 +474,14 @@ void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode PPC_OPC_TEMPL_X(opcode, cr, rA, rB); cr >>= 2; - IMLReg gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegisterA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg gprRegisterB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); - IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); - IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); - IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); @@ -525,13 +504,13 @@ bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod } cr >>= 2; - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); - IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); - IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); - IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); @@ -558,7 +537,7 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } // is jump destination within recompiled function? if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) - ppcImlGenContext->emitInst().make_jump_new(); + ppcImlGenContext->emitInst().make_jump(); else ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; @@ -584,7 +563,7 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) IMLReg regCRBit; if (!ignoreCondition) - regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit); uint32 jumpAddressDest = BD; if( (opcode&PPC_OPC_AA) == 0 ) @@ -601,7 +580,7 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue); blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } @@ -612,11 +591,11 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { if( ignoreCondition == false ) return false; // not supported for the moment - IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - IMLReg tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR); + IMLReg tmpBoolReg = _GetRegTemporaryS8(ppcImlGenContext, 1); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); - ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); + ppcImlGenContext->emitInst().make_conditional_jump(tmpBoolReg, true); return true; } else @@ -632,7 +611,7 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue); } else { @@ -659,7 +638,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regCRBit; if (!BO.conditionIgnore()) - regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit); uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); if (LK) @@ -688,7 +667,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, !BO.conditionInverted()); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, !BO.conditionInverted()); bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); } else @@ -718,13 +697,12 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regA, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -733,22 +711,16 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, imm); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, imm); } else { - // rA not used, instruction is value assignment - // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, imm); } - // never updates any cr return true; } @@ -757,21 +729,16 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm); } else { - // rA not used, instruction turns into simple value assignment - // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, (sint32)imm); } - // never updates any cr return true; } @@ -780,13 +747,13 @@ bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // r = a + b -> update carry sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); return true; } @@ -795,12 +762,12 @@ bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa); if(updateCR0) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -809,13 +776,13 @@ bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // r = a + b + carry -> update carry sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); return true; } @@ -824,12 +791,12 @@ bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // r = a + carry -> update carry sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); return true; } @@ -838,12 +805,12 @@ bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // r = a + 0xFFFFFFFF + carry -> update carry sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); return true; } @@ -852,12 +819,12 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); // rD = ~rA + rB + 1 - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -866,15 +833,15 @@ bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // d = ~a + b + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -883,14 +850,14 @@ bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // d = ~a + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -899,16 +866,16 @@ bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // d = ~a + b + 1; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1 ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -918,13 +885,12 @@ bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa); - // never affects CR0 return true; } @@ -933,9 +899,8 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // mulli instruction does not modify any flags - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm); return true; } @@ -945,16 +910,16 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); //hCPU->gpr[rD] = hCPU->gpr[rA] * hCPU->gpr[rB]; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); + uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); if (opcode & PPC_OPC_OE) { return false; } ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); return true; } @@ -962,13 +927,12 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = ((sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB])>>32; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); + uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); return true; } @@ -976,13 +940,12 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (hCPU->gpr[rA] * hCPU->gpr[rB])>>32; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); + uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); return true; } @@ -990,13 +953,12 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = (sint32)a / (sint32)b; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); + uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); return true; } @@ -1005,12 +967,12 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); // hCPU->gpr[rD] = (uint32)a / (uint32)b; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); + uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); + uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); return true; } @@ -1020,7 +982,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 mask = ppc_mask(MB, ME); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); + uint32 registerRS = _GetRegGPR(ppcImlGenContext, rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( ME == (31-SH) && MB == 0 ) { @@ -1043,7 +1005,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); } if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1052,13 +1014,13 @@ bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opc int rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // pack RLWIMI parameters into single integer uint32 vImm = MB|(ME<<8)|(SH<<16); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1067,25 +1029,25 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA, rB, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME); uint32 mask = ppc_mask(MB, ME); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if( mask != 0xFFFFFFFF ) ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // unlike SRAWI, for SRAW the shift range is 0-63 (6 bits) + // unlike SRAWI, for SRAW the shift range is 0-63 (masked to 6 bits) // but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); @@ -1097,7 +1059,7 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load masked shift factor into temporary register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F); ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT); - ppcImlGenContext->emitInst().make_conditional_jump_new(registerTmpCondBool, true); + ppcImlGenContext->emitInst().make_conditional_jump(registerTmpCondBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) @@ -1129,8 +1091,8 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, SH); cemu_assert_debug(SH < 32); if (SH == 0) - return false; // becomes a no-op but also sets ca bit to 0? - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS, false); + return false; // becomes a no-op (unless RC bit is set) but also sets ca bit to 0? + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); uint32 registerTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); @@ -1141,9 +1103,8 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0) // do the actual shift ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH); - - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1152,12 +1113,12 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1166,12 +1127,12 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1180,11 +1141,11 @@ bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1192,11 +1153,11 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1205,11 +1166,11 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; } @@ -1219,11 +1180,11 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); PPC_ASSERT(rB == 0); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, registerRD, registerRA); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, registerRD); return true; } @@ -1239,13 +1200,13 @@ void PPCRecompilerImlGen_LWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 32, false, true); } void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1260,7 +1221,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded @@ -1268,7 +1229,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 32, false, true); } void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1283,13 +1244,13 @@ void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, true, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 16, true, true); } void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1304,7 +1265,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded @@ -1312,7 +1273,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, true, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 16, true, true); } void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1328,13 +1289,13 @@ void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 16, false, true); } void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1349,7 +1310,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded @@ -1357,7 +1318,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 16, false, true); } void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1372,13 +1333,13 @@ void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 8, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 8, false, true); } void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1393,7 +1354,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // check if destination register is already loaded @@ -1401,7 +1362,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 8, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 8, false, true); } bool PPCRecompilerImlGen_LWZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1414,8 +1375,8 @@ bool PPCRecompilerImlGen_LWZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod } // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]); // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1434,8 +1395,8 @@ bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1443,7 +1404,7 @@ bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // add rB to rA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 32, false, true); return true; } @@ -1454,8 +1415,8 @@ bool PPCRecompilerImlGen_LWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // load memory rA and rB into register uint32 gprRegisterA = 0; if( rA ) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); if (destinationRegister == PPC_REC_INVALID_REGISTER) @@ -1464,7 +1425,7 @@ bool PPCRecompilerImlGen_LWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( rA ) PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false, false); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterB, 0, 32, false, false); return true; } @@ -1479,8 +1440,8 @@ bool PPCRecompilerImlGen_LHAX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return true; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1501,8 +1462,8 @@ bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1510,7 +1471,7 @@ bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // add rB to rA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, true, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 16, true, true); return true; } @@ -1525,8 +1486,8 @@ bool PPCRecompilerImlGen_LHZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return true; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1547,8 +1508,8 @@ bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1556,7 +1517,7 @@ bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // add rB to rA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load hald word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 16, false, true); return true; } @@ -1565,15 +1526,15 @@ void PPCRecompilerImlGen_LHBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rA, rD, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); // load memory rA and rB into register - uint32 gprRegisterA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false) : 0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + uint32 gprRegisterA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : 0; + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); if (destinationRegister == PPC_REC_INVALID_REGISTER) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register // load half word (little-endian) if (rA == 0) - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 16, false, false); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterB, 0, 16, false, false); else PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, false); } @@ -1587,10 +1548,9 @@ bool PPCRecompilerImlGen_LBZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // special case where rA is ignored and only rB is used return false; } - // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]); // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1611,8 +1571,8 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); if (destinationRegister == PPC_REC_INVALID_REGISTER) @@ -1620,7 +1580,7 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // add rB to rA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 8, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 8, false, true); return true; } @@ -1634,13 +1594,13 @@ void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode while( rD <= 31 ) { // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm+index*4, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm+index*4, 32, false, true); // next rD++; index++; @@ -1660,11 +1620,11 @@ void PPCRecompilerImlGen_STW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 32, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 32, true); } void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1680,14 +1640,14 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod } // store&update instructions where rD==rA store the register contents without added imm, therefore we need to handle it differently // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); @@ -1705,11 +1665,11 @@ void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister // load half - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 16, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 16, true); } void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1724,14 +1684,14 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); @@ -1749,11 +1709,11 @@ void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return; } // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // can be the same as gprRegister // store byte - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 8, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 8, true); } void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1768,14 +1728,14 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return; } // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // store byte - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); @@ -1789,13 +1749,13 @@ bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uin // prepare registers uint32 gprRegisterA; if(rA != 0) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // store word if (rA == 0) { - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, storeBitWidth, !byteReversed); + ppcImlGenContext->emitInst().make_memory_r(destinationRegister, gprRegisterB, 0, storeBitWidth, !byteReversed); } else PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, !byteReversed); @@ -1814,8 +1774,8 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte if( rS == rA || rS == rB ) { // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, true); @@ -1824,13 +1784,13 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte return true; } // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 sourceRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // update EA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegisterA, 0, storeBitWidth, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegisterA, 0, storeBitWidth, true); return true; } @@ -1839,14 +1799,14 @@ bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rA, rS, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):0; + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // store word if( rA != 0 ) PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false); + ppcImlGenContext->emitInst().make_memory_r(destinationRegister, gprRegisterB, 0, 32, false); return true; } @@ -1860,11 +1820,11 @@ void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod while( rS <= 31 ) { // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister + uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // can be the same as gprRegister // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm+index*4, 32, true); + ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm+index*4, 32, true); // next rS++; index++; @@ -1886,15 +1846,15 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift - uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA); + IMLReg tmpReg = _GetRegTemporary(ppcImlGenContext, 0); uint32 memOffset = 0; while (nb > 0) { if (rD == rA) return false; cemu_assert(rD < 32); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg destinationRegister = _GetRegGPR(ppcImlGenContext, rD); // load bytes one-by-one for (sint32 b = 0; b < 4; b++) { @@ -1920,15 +1880,15 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( nb == 0 ) nb = 32; - uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA); + IMLReg tmpReg = _GetRegTemporary(ppcImlGenContext, 0); uint32 memOffset = 0; while (nb > 0) { if (rS == rA) return false; cemu_assert(rS < 32); - uint32 dataRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg dataRegister = _GetRegGPR(ppcImlGenContext, rS); // store bytes one-by-one for (sint32 b = 0; b < 4; b++) { @@ -1984,11 +1944,11 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco else ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); // get CR bit regs and set LT, GT and SO immediately - IMLReg regCrLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); - IMLReg regCrGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); - IMLReg regCrEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); - IMLReg regCrSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); - IMLReg regXerSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regCrLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); + IMLReg regCrGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); + IMLReg regCrEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); + IMLReg regCrSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regXerSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO); @@ -1998,7 +1958,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // compare calculated EA with reservation IMLReg regTmpBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); - ppcImlGenContext->emitInst().make_conditional_jump_new(regTmpBool, true); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) @@ -2018,9 +1978,8 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // reset reservation // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure - // In contrast, "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared - // There may also be differences between individual PPC generations - // In disassembly I have never seen more than one STWCX after each LWARX, which hints at reservation always being cleared or at least the compiler assuming this + // "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared + // There may also be different behavior between individual PPC generations ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); @@ -2033,8 +1992,8 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod rA = (opcode>>16)&0x1F; rB = (opcode>>11)&0x1F; // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):0; + uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // store if( rA != 0 ) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); @@ -2053,21 +2012,21 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // simple register copy if( rA != rS ) // check if no-op { - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); } if ((opcode & PPC_OPC_RC)) { sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } } else { // rA = rS | rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); + sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) { @@ -2086,7 +2045,7 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); } if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } return true; } @@ -2106,7 +2065,7 @@ bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else { @@ -2127,7 +2086,7 @@ bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode } ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else { @@ -2140,7 +2099,7 @@ bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } } return true; @@ -2150,13 +2109,15 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // hCPU->gpr[rA] = hCPU->gpr[rS] | ~hCPU->gpr[rB]; - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + // rA = rS | ~rB; + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + sint32 regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regTmp); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -2170,11 +2131,11 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode // simple register copy if( rA != rS ) // check if no-op { - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else { @@ -2184,8 +2145,8 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode else { // rA = rS & rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); + sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) { @@ -2199,7 +2160,7 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg); } if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else { @@ -2211,7 +2172,7 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode // rA &= rB ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } } return true; @@ -2229,25 +2190,25 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else if( rA == rB ) { // rB already in rA, therefore we complement rA first and then AND it with rS - sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprRS = _GetRegGPR(ppcImlGenContext, rS); + sint32 gprDestReg = _GetRegGPR(ppcImlGenContext, rA); // rA = ~rA ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); // rA &= rS ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprRS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } else { // a & (~b) is the same as ~((~a) | b) - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - sint32 gprRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + sint32 gprDestReg = _GetRegGPR(ppcImlGenContext, rA); + sint32 gprRB = _GetRegGPR(ppcImlGenContext, rB); sint32 gprRS = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // move rS to rA (if required) if( gprDestReg != gprRS ) @@ -2262,7 +2223,7 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // rA = ~rA ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } return true; } @@ -2272,7 +2233,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) @@ -2280,7 +2241,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // rA &= imm32 ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); // ANDI. always sets cr0 - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2288,7 +2249,7 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) @@ -2296,30 +2257,26 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // rA &= imm32 ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); // ANDIS. always sets cr0 - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); } bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - IMLReg gprDestReg; + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); if( rS == rB ) { - // xor register with itself - gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0); } else { - // rA = rS ^ rA - IMLReg gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - IMLReg gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg, gprSource2Reg); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB); } if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -2328,43 +2285,21 @@ bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); if( rS == rB ) { - // xor register with itself, then invert - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, -1); } else { - // rA = ~(rS ^ rA) - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA ^= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); + // rA = ~(rS ^ rB) + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); } + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -2373,9 +2308,9 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2383,9 +2318,9 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2393,9 +2328,9 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2403,18 +2338,18 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB); return true; } @@ -2423,9 +2358,9 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); @@ -2436,9 +2371,9 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB); return true; } @@ -2447,9 +2382,9 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); @@ -2460,9 +2395,9 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); if (regCrA == regCrB) { ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0); @@ -2476,9 +2411,9 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); - IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); - IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); if (regCrA == regCrB) { ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index b8986db41..ac0a46bd9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -4,7 +4,7 @@ #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" -IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { @@ -136,7 +136,7 @@ bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -156,7 +156,7 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index @@ -182,8 +182,8 @@ bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -207,8 +207,8 @@ bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // add rB to rA (if rA != 0) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); // get fpr register index @@ -234,7 +234,7 @@ bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode assert_dbg(); } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); @@ -251,7 +251,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod assert_dbg(); } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index @@ -271,8 +271,8 @@ bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); @@ -289,8 +289,8 @@ bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // add rB to rA (if rA != 0) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); // get fpr register index @@ -305,7 +305,7 @@ bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -319,7 +319,7 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index @@ -339,8 +339,8 @@ bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( ppcImlGenContext->LSQE ) @@ -365,8 +365,8 @@ bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); // calculate EA in rA @@ -387,7 +387,7 @@ bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); @@ -405,7 +405,7 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index @@ -425,8 +425,8 @@ bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( ppcImlGenContext->LSQE ) @@ -449,13 +449,13 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 gprRegister2; if( rA != 0 ) { - gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); } else { // rA is not used - gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); gprRegister2 = 0; } // get fpr register index @@ -949,10 +949,10 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); - IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); - IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); @@ -1112,9 +1112,9 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool readPS1 = (opcode & 0x8000) == false; // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); + uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // psq load @@ -1138,9 +1138,9 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool readPS1 = (opcode & 0x8000) == false; // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); + uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index @@ -1163,9 +1163,9 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool storePS1 = (opcode & 0x8000) == false; // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); + uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store @@ -1189,9 +1189,9 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op bool storePS1 = (opcode & 0x8000) == false; // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); + uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 61be66aa1..0a87a1e7b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -81,7 +81,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - entrySegment->imlList.data()[0].make_jump_new(); + entrySegment->imlList.data()[0].make_jump(); IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false; From b4f2e02e559d58db78befcda6f043818acbe34fb Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 3 Jan 2023 09:13:40 +0100 Subject: [PATCH 29/64] PPCRec: Refactor load/store instructions --- .../Espresso/Recompiler/IML/IMLInstruction.h | 1 - .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 1 - .../Recompiler/PPCRecompilerImlGen.cpp | 918 ++++-------------- 3 files changed, 209 insertions(+), 711 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 39803fbff..f8e5a646a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -518,7 +518,6 @@ struct IMLInstruction this->op_r_r.registerA = registerA; } - void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32) { this->type = PPCREC_IML_TYPE_R_S32; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index fc783b8a0..d951fb1d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -16,7 +16,6 @@ void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); // GPR register management uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); -uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // FPR register management uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 42c0c3bb6..539f075a1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -188,10 +188,6 @@ uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcIm return PPC_REC_INVALID_REGISTER; } -/* - * Loads a PPC gpr into any of the available IML registers - * If loadNew is false, it will reuse already loaded instances - */ uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); @@ -202,19 +198,6 @@ uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, ui return registerIndex; } -/* - * Reuse already loaded register if present - * Otherwise create new IML register and map the name. The register contents will be undefined - */ -uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) -{ - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return registerIndex; -} - IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 32); @@ -245,7 +228,7 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 4); - return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } /* @@ -386,12 +369,12 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); if (spr == SPR_CTR || spr == SPR_LR) { - uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { - uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; } @@ -647,11 +630,11 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { // if the branch target is LR, then preserve it in a temporary cemu_assert_suspicious(); // this case needs testing - uint32 tmpRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + uint32 tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); branchDestReg = tmpRegister; } - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + uint32 registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } @@ -676,20 +659,17 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); - } return true; } bool PPCRecompilerImlGen_ISYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // does not need to be translated return true; } bool PPCRecompilerImlGen_SYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // does not need to be translated return true; } @@ -750,7 +730,7 @@ bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa); if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); @@ -764,7 +744,7 @@ bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa); if(updateCR0) PPCImlGen_UpdateCR0(ppcImlGenContext, regD); @@ -836,7 +816,7 @@ bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); @@ -852,7 +832,7 @@ bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa); @@ -869,8 +849,8 @@ bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); - IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1 ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); @@ -887,8 +867,8 @@ bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); - IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa); return true; @@ -983,7 +963,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc uint32 mask = ppc_mask(MB, ME); uint32 registerRS = _GetRegGPR(ppcImlGenContext, rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( ME == (31-SH) && MB == 0 ) { // SLWI @@ -1015,7 +995,7 @@ bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // pack RLWIMI parameters into single integer uint32 vImm = MB|(ME<<8)|(SH<<16); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm); @@ -1031,7 +1011,7 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 mask = ppc_mask(MB, ME); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if( mask != 0xFFFFFFFF ) ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); @@ -1048,13 +1028,13 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); - uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); - uint32 registerTmpCondBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); - uint32 registerTmp1 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); - uint32 registerTmp2 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 registerTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + uint32 registerTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + uint32 registerTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); // load masked shift factor into temporary register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F); @@ -1093,9 +1073,9 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (SH == 0) return false; // becomes a no-op (unless RC bit is set) but also sets ca bit to 0? uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); - uint32 registerTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 registerCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + uint32 registerTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // calculate CA first ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1129,7 +1109,7 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1142,7 +1122,7 @@ bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, rB); PPC_ASSERT(rB==0); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1154,7 +1134,7 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1167,7 +1147,7 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_X(opcode, rS, rA, rB); PPC_ASSERT(rB==0); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, registerRA, registerRS); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1181,406 +1161,124 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode PPC_ASSERT(rB == 0); uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, registerRD, registerRA); if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRD); return true; } -void PPCRecompilerImlGen_LWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 32, false, true); -} - -void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 32, false, true); -} - -void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) { int rA, rD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 16, true, true); -} - -void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 16, true, true); -} - -void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - // note: Darksiders 2 has this instruction form but it is never executed. - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 16, false, true); -} - -void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 16, false, true); -} - -void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) + IMLReg regMemAddr; + if (rA == 0) { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; + if (updateAddrReg) + return false; // invalid instruction form + regMemAddr = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemAddr, 0); } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm, 8, false, true); -} - -void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) + else { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; + if (updateAddrReg && rA == rD) + return false; // invalid instruction form + regMemAddr = _GetRegGPR(ppcImlGenContext, rA); } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, 0, 8, false, true); -} - -bool PPCRecompilerImlGen_LWZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + if (updateAddrReg) { - return false; + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regMemAddr, regMemAddr, (sint32)imm); + imm = 0; } - // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]); - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, true); + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_memory(regDst, regMemAddr, (sint32)imm, bitWidth, signExtend, isBigEndian); return true; } -bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) { sint32 rA, rD, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + if (updateAddrReg && (rA == 0 || rD == rB)) + return false; // invalid instruction form + IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); + if (updateAddrReg) { - return false; + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB); + // use single register addressing + regB = regA; + regA = IMLREG_INVALID; } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - // load word - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 32, false, true); - return true; -} - -bool PPCRecompilerImlGen_LWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = 0; - if( rA ) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // load word - if( rA ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); + if(regA != IMLREG_INVALID) + PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian); else - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterB, 0, 32, false, false); + ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian); return true; } -bool PPCRecompilerImlGen_LHAX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) { - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + int rA, rD; + uint32 imm; + PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); + IMLReg regA; + if (rA != 0) { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; + regA = _GetRegGPR(ppcImlGenContext, rA); } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, true, true); - return true; -} - -bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + else { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; + if (updateAddrReg) + return false; // invalid instruction form + regA = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0); } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - // load half word - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 16, true, true); - return true; -} - -bool PPCRecompilerImlGen_LHZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + if (updateAddrReg) { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; + if (regD == regA) + { + // make sure to keep source data intact + regD = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regD, regA); + } + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regA, regA, (sint32)imm); + imm = 0; } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, true); + ppcImlGenContext->emitInst().make_memory_r(regD, regA, (sint32)imm, bitWidth, isBigEndian); return true; } -bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) { - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS); + if (updateAddrReg) { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; + if(rA == 0) + return false; // invalid instruction form + if (regSrc == regA) + { + // make sure to keep source data intact + regSrc = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regSrc, regA); + } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB); + // use single register addressing + regB = regA; + regA = IMLREG_INVALID; } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - // load hald word - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 16, false, true); - return true; -} - -void PPCRecompilerImlGen_LHBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : 0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // load half word (little-endian) - if (rA == 0) - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterB, 0, 16, false, false); + if (regA == IMLREG_INVALID) + ppcImlGenContext->emitInst().make_memory_r(regSrc, regB, 0, bitWidth, isBigEndian); else - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, false); -} - -bool PPCRecompilerImlGen_LBZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special case where rA is ignored and only rB is used - return false; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 8, false, true); - return true; -} - -bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if (rA == 0) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // add rB to rA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - // load byte - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegisterA, 0, 8, false, true); + PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, regSrc, regA, regB, bitWidth, false, isBigEndian); return true; } @@ -1591,225 +1289,18 @@ void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); cemu_assert_debug(rA != 0); sint32 index = 0; - while( rD <= 31 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register + while (rD <= 31) + { + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); // load word - ppcImlGenContext->emitInst().make_r_memory(destinationRegister, gprRegister, imm+index*4, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(regD, regA, (sint32)imm + index * 4, 32, false, true); // next rD++; index++; } } -void PPCRecompilerImlGen_STW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - // note: Darksiders 2 has this instruction form but it is never executed. - //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister - // store word - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 32, true); -} - -void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // store&update instructions where rD==rA store the register contents without added imm, therefore we need to handle it differently - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // store word - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); -} - -void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister - // load half - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 16, true); -} - -void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // store word - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); -} - -void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rS; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // can be the same as gprRegister - // store byte - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm, 8, true); -} - -void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // store byte - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); -} - -// generic indexed store (STWX, STHX, STBX, STWUX. If byteReversed == true -> STHBRX) -bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth, bool byteReversed = false) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA; - if(rA != 0) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if (rA == 0) - { - ppcImlGenContext->emitInst().make_memory_r(destinationRegister, gprRegisterB, 0, storeBitWidth, !byteReversed); - } - else - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, !byteReversed); - return true; -} - -bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - if( rA == 0 ) - { - // not supported - return false; - } - if( rS == rA || rS == rB ) - { - // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, true); - // update EA after store - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - return true; - } - // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 sourceRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // update EA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterA, gprRegisterB); - // store word - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegisterA, 0, storeBitWidth, true); - return true; -} - -bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); - else - ppcImlGenContext->emitInst().make_memory_r(destinationRegister, gprRegisterB, 0, 32, false); - return true; -} - void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rS, rA; @@ -1819,12 +1310,10 @@ void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 index = 0; while( rS <= 31 ) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // can be the same as gprRegister + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); // store word - ppcImlGenContext->emitInst().make_memory_r(sourceRegister, gprRegister, imm+index*4, 32, true); + ppcImlGenContext->emitInst().make_memory_r(regS, regA, (sint32)imm + index * 4, 32, true); // next rS++; index++; @@ -1914,7 +1403,7 @@ bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); // calculate EA @@ -1956,7 +1445,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); // compare calculated EA with reservation - IMLReg regTmpBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + IMLReg regTmpBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); ppcImlGenContext->emitInst().make_conditional_jump(regTmpBool, true); @@ -2013,7 +1502,7 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( rA != rS ) // check if no-op { sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); } if ((opcode & PPC_OPC_RC)) @@ -2027,7 +1516,7 @@ bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // rA = rS | rA sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) { // make sure we don't overwrite rS or rA @@ -2060,7 +1549,7 @@ bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { // simple register copy with NOT sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); if (gprDestReg != gprSourceReg) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); @@ -2072,7 +1561,7 @@ bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode // rA = rS | rA sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); if (gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg) { // make sure we don't overwrite rS or rA @@ -2132,7 +1621,7 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( rA != rS ) // check if no-op { sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); @@ -2147,7 +1636,7 @@ bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode // rA = rS & rA sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) { // make sure we don't overwrite rS or rA @@ -2187,7 +1676,7 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( rS == rB ) { // result is always 0 -> replace with XOR rA,rA - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); @@ -2209,7 +1698,7 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // a & (~b) is the same as ~((~a) | b) sint32 gprDestReg = _GetRegGPR(ppcImlGenContext, rA); sint32 gprRB = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprRS = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); // move rS to rA (if required) if( gprDestReg != gprRS ) { @@ -2234,7 +1723,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); @@ -2250,7 +1739,7 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); IMLReg gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // rA = rS if( gprDestReg != gprSourceReg ) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); @@ -2361,7 +1850,7 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); return true; @@ -2385,7 +1874,7 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); return true; @@ -2419,7 +1908,7 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); return true; } - IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp); return true; @@ -2847,9 +2336,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_TW(ppcImlGenContext, opcode); break; case 8: - // todo: Check if we can optimize this pattern: - // SUBFC + SUBFE - // SUBFC if (PPCRecompilerImlGen_SUBFC(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; @@ -2869,8 +2355,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_LWARX(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 23: - if (PPCRecompilerImlGen_LWZX(ppcImlGenContext, opcode) == false) + case 23: // LWZX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false)) unsupportedInstructionFound = true; break; case 24: @@ -2895,8 +2381,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 54: // DBCST - Generates no code break; - case 55: - if (PPCRecompilerImlGen_LWZUX(ppcImlGenContext, opcode) == false) + case 55: // LWZUX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true)) unsupportedInstructionFound = true; break; case 60: @@ -2910,16 +2396,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 86: // DCBF -> No-Op break; - case 87: - if (PPCRecompilerImlGen_LBZX(ppcImlGenContext, opcode) == false) + case 87: // LBZX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false)) unsupportedInstructionFound = true; break; case 104: if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 119: - if (PPCRecompilerImlGen_LBZUX(ppcImlGenContext, opcode) == false) + case 119: // LBZUX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true)) unsupportedInstructionFound = true; break; case 124: @@ -2938,16 +2424,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode); break; case 150: - if (PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode) == false) + if (!PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; - case 151: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32) == false) + case 151: // STWX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, false)) unsupportedInstructionFound = true; break; - case 183: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 32) == false) - unsupportedInstructionFound = true; + case 183: // STWUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, true)) + unsupportedInstructionFound = true; break; case 200: if (PPCRecompilerImlGen_SUBFZE(ppcImlGenContext, opcode) == false) @@ -2957,8 +2443,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_ADDZE(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 215: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8) == false) + case 215: // STBX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, false)) unsupportedInstructionFound = true; break; case 234: @@ -2969,23 +2455,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_MULLW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 247: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 8) == false) + case 247: // STBUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, true)) unsupportedInstructionFound = true; break; case 266: if (PPCRecompilerImlGen_ADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 279: - if (PPCRecompilerImlGen_LHZX(ppcImlGenContext, opcode) == false) + case 279: // LHZX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false)) unsupportedInstructionFound = true; break; case 284: PPCRecompilerImlGen_EQV(ppcImlGenContext, opcode); break; - case 311: - if (PPCRecompilerImlGen_LHZUX(ppcImlGenContext, opcode) == false) + case 311: // LHZUX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true)) unsupportedInstructionFound = true; break; case 316: @@ -2996,28 +2482,28 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_MFSPR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 343: - if (PPCRecompilerImlGen_LHAX(ppcImlGenContext, opcode) == false) + case 343: // LHAX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false)) unsupportedInstructionFound = true; break; case 371: if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 375: - if (PPCRecompilerImlGen_LHAUX(ppcImlGenContext, opcode) == false) + case 375: // LHAUX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true)) unsupportedInstructionFound = true; break; - case 407: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16) == false) + case 407: // STHX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false)) unsupportedInstructionFound = true; break; case 412: if (PPCRecompilerImlGen_ORC(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 439: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 16) == false) + case 439: // STHUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, true)) unsupportedInstructionFound = true; break; case 444: @@ -3035,10 +2521,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_DIVW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 534: - if (PPCRecompilerImlGen_LWBRX(ppcImlGenContext, opcode) == false) + case 534: // LWBRX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false)) unsupportedInstructionFound = true; - ppcImlGenContext->hasFPUInstruction = true; break; case 535: if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false) @@ -3071,8 +2556,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 662: - if (PPCRecompilerImlGen_STWBRX(ppcImlGenContext, opcode) == false) + case 662: // STWBRX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, false, false)) unsupportedInstructionFound = true; break; case 663: @@ -3091,8 +2576,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_STFDX(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 790: - PPCRecompilerImlGen_LHBRX(ppcImlGenContext, opcode); + case 790: // LHBRX + if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false)) + unsupportedInstructionFound = true; break; case 792: if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false) @@ -3103,7 +2589,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 918: // STHBRX - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true) == false) + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, false, true)) unsupportedInstructionFound = true; break; case 922: @@ -3127,47 +2613,61 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) break; } break; - case 32: - PPCRecompilerImlGen_LWZ(ppcImlGenContext, opcode); + case 32: // LWZ + if(!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, false)) + unsupportedInstructionFound = true; break; - case 33: - PPCRecompilerImlGen_LWZU(ppcImlGenContext, opcode); + case 33: // LWZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, true)) + unsupportedInstructionFound = true; break; - case 34: - PPCRecompilerImlGen_LBZ(ppcImlGenContext, opcode); + case 34: // LBZ + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, false)) + unsupportedInstructionFound = true; break; - case 35: - PPCRecompilerImlGen_LBZU(ppcImlGenContext, opcode); + case 35: // LBZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, true)) + unsupportedInstructionFound = true; break; - case 36: - PPCRecompilerImlGen_STW(ppcImlGenContext, opcode); + case 36: // STW + if(!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, false)) + unsupportedInstructionFound = true; break; - case 37: - PPCRecompilerImlGen_STWU(ppcImlGenContext, opcode); + case 37: // STWU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, true)) + unsupportedInstructionFound = true; break; - case 38: - PPCRecompilerImlGen_STB(ppcImlGenContext, opcode); + case 38: // STB + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, false)) + unsupportedInstructionFound = true; break; - case 39: - PPCRecompilerImlGen_STBU(ppcImlGenContext, opcode); + case 39: // STBU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, true)) + unsupportedInstructionFound = true; break; - case 40: - PPCRecompilerImlGen_LHZ(ppcImlGenContext, opcode); + case 40: // LHZ + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, false)) + unsupportedInstructionFound = true; break; - case 41: - PPCRecompilerImlGen_LHZU(ppcImlGenContext, opcode); + case 41: // LHZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, true)) + unsupportedInstructionFound = true; break; - case 42: - PPCRecompilerImlGen_LHA(ppcImlGenContext, opcode); + case 42: // LHA + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, false)) + unsupportedInstructionFound = true; break; - case 43: - PPCRecompilerImlGen_LHAU(ppcImlGenContext, opcode); + case 43: // LHAU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, true)) + unsupportedInstructionFound = true; break; - case 44: - PPCRecompilerImlGen_STH(ppcImlGenContext, opcode); + case 44: // STH + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, false)) + unsupportedInstructionFound = true; break; - case 45: - PPCRecompilerImlGen_STHU(ppcImlGenContext, opcode); + case 45: // STHU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, true)) + unsupportedInstructionFound = true; break; case 46: PPCRecompilerImlGen_LMW(ppcImlGenContext, opcode); From 3ba9460dc1b229a2cbe63c26c100c6d1088d6027 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 5 Jan 2023 05:34:36 +0100 Subject: [PATCH 30/64] PPCRec: Use IMLReg in more places, unify and simplify var names --- .../Recompiler/BackendX64/BackendX64.cpp | 202 ++++----- .../Recompiler/BackendX64/BackendX64.h | 8 - .../Recompiler/BackendX64/BackendX64FPU.cpp | 202 ++++----- .../Recompiler/BackendX64/BackendX64Gen.cpp | 107 ----- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 1 - .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 2 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 46 +- .../Recompiler/IML/IMLInstruction.cpp | 276 ++++++------ .../Espresso/Recompiler/IML/IMLInstruction.h | 199 ++++----- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 416 +++++------------- .../Recompiler/IML/IMLRegisterAllocator.cpp | 8 +- .../Recompiler/PPCRecompilerImlGen.cpp | 2 +- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 23 +- 13 files changed, 575 insertions(+), 917 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 049b7345b..43cdb7982 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -545,54 +545,54 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { // registerResult = registerA - if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { - if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // if movbe is available we can move and swap in a single instruction? - x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); + if (imlInstruction->op_r_r.regA != imlInstruction->op_r_r.regR) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); // if movbe is available we can move and swap in a single instruction? + x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.regR); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.regR, reg32ToReg16(imlInstruction->op_r_r.regA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= registerA - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } else { // registerResult ^= registerA - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { // copy register content if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); + if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA ) + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); } else if (imlInstruction->operation == PPCREC_IML_OP_NEG) { // copy register content if different registers - if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); + if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA) + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { @@ -600,29 +600,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { - x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); } else { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regA, imlInstruction->op_r_r.regA); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); - x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1); + x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); + x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32-1); sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) + if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA ) { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerResult); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regR); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -631,7 +631,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -650,25 +650,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { - x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { - x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { - x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen - x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); + x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.regR, (uint8)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { @@ -731,9 +731,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -751,9 +751,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( rRegOperand1 == rRegOperand2 ) { // result = operand1 - operand1 -> 0 @@ -782,9 +782,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegA = imlInstruction->op_r_r_r.registerA; - sint32 rRegB = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegA = imlInstruction->op_r_r_r.regA; + sint32 rRegB = imlInstruction->op_r_r_r.regB; if (rRegResult == rRegB) std::swap(rRegA, rRegB); @@ -801,9 +801,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand1 * registerOperand2 - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -823,9 +823,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) { @@ -865,9 +865,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; // todo: Use BMI2 rotate if available // check if CL/ECX/RCX is available if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX ) @@ -906,9 +906,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // since our register allocator doesn't support instruction based fixed phys registers yet // we'll instead have to temporarily shuffle registers around - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; // we use BMI2's shift instructions until the RA can assign fixed registers if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) @@ -981,9 +981,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); @@ -1015,9 +1015,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; + sint32 rRegResult = imlInstruction->op_r_r_r.regR; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); @@ -1095,9 +1095,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - auto regR = _reg8(imlInstruction->op_compare.registerResult); - auto regA = _reg32(imlInstruction->op_compare.registerOperandA); - auto regB = _reg32(imlInstruction->op_compare.registerOperandB); + auto regR = _reg8(imlInstruction->op_compare.regR); + auto regA = _reg32(imlInstruction->op_compare.regA); + auto regB = _reg32(imlInstruction->op_compare.regB); X86Cond cond = _x86Cond(imlInstruction->op_compare.cond); x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc x64GenContext->emitter->CMP_dd(regA, regB); @@ -1107,8 +1107,8 @@ bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - auto regR = _reg8(imlInstruction->op_compare_s32.registerResult); - auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA); + auto regR = _reg8(imlInstruction->op_compare_s32.regR); + auto regA = _reg32(imlInstruction->op_compare_s32.regA); sint32 imm = imlInstruction->op_compare_s32.immS32; X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond); x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc @@ -1119,8 +1119,8 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { - auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool); - bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue; + auto regBool = _reg8(imlInstruction->op_conditional_jump.registerBool); + bool mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue; x64GenContext->emitter->TEST_bb(regBool, regBool); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0); @@ -1136,14 +1136,14 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - sint32 regResult = imlInstruction->op_r_r_s32.registerResult; - sint32 regOperand = imlInstruction->op_r_r_s32.registerA; + sint32 regResult = imlInstruction->op_r_r_s32.regR; + sint32 regOperand = imlInstruction->op_r_r_s32.regA; uint32 immS32 = imlInstruction->op_r_r_s32.immS32; if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; + sint32 rRegResult = imlInstruction->op_r_r_s32.regR; + sint32 rRegOperand = imlInstruction->op_r_r_s32.regA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if(regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); @@ -1177,22 +1177,22 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.regA); // rotate destination register if( sh ) x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, ~mask); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.regR, ~mask); // AND temporary rS register with mask x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, REG_RESV_TEMP); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.regR, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; + sint32 rRegResult = imlInstruction->op_r_r_s32.regR; + sint32 rRegOperand = imlInstruction->op_r_r_s32.regA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize if( rRegResult != rRegOperand ) @@ -1203,15 +1203,15 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); + if( imlInstruction->op_r_r_s32.regA != imlInstruction->op_r_r_s32.regR ) + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.regA); if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) - x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); + x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) - x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); + x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); else // RIGHT_SHIFT_S - x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); + x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); } else { @@ -1272,48 +1272,48 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else if (name == PPCREC_NAME_XER_CA) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } else if (name == PPCREC_NAME_XER_SO) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); } else assert_dbg(); @@ -1324,48 +1324,48 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.regR); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.regR); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.regR); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.regR); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.regR); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.regR); } else if (name == PPCREC_NAME_XER_CA) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); } else if (name == PPCREC_NAME_XER_SO) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.regR); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.regR); } else assert_dbg(); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 066078cbb..1a0fffec5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -129,9 +129,6 @@ void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstReg void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32); void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32); -void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister); -void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister); - void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); @@ -140,9 +137,6 @@ void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32); -void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); -void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); -void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32); void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister); void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister); @@ -174,9 +168,7 @@ void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_cdq(x64GenContext_t* x64GenContext); -void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister); -void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 473124879..8c591c971 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -35,11 +35,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -52,11 +52,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -101,7 +101,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, } // optimized code for ps float load x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM); @@ -343,14 +343,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load value x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); } else { x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); } @@ -462,7 +462,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); return; } @@ -715,15 +715,15 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction { if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) { // VPUNPCKHQDQ - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) + if (imlInstruction->op_fpr_r_r.regR == imlInstruction->op_fpr_r_r.regA) { // unpack top to bottom and top - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } //else if ( hasAVXSupport ) //{ @@ -734,142 +734,142 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction else { // move top to bottom - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); // duplicate bottom - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerResult); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regR); } } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) { - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) { - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); + if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) { - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 2); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) { // use unpckhpd here? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 3); + _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) { - x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { - x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) { - x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) { - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { - x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.regA); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register - x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP); + x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); // call assembly routine to calculate accurate FRES result in XMM15 x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // copy result to bottom and top half of result register - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); // call assembly routine to calculate accurate FRSQRTE result in XMM15 x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // copy result to bottom of result register - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) { // copy register - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) + if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } // toggle sign bits - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); + x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) { // copy register - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) + if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); } // set sign bit to 0 - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); + x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { // calculate bottom half of result - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); else x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); // calculate top half of result // todo - this top to bottom copy can be optimized? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand, 3); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA, 3); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); // copy bottom to top + x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); // copy bottom to top } else { @@ -884,76 +884,76 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { - if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) + if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } - else if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB) + else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); } else { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) { // todo: Use AVX 3-operand VADDSD if available - if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) + if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } - else if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB) + else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); } else { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) { // registerResult = registerOperandA - registerOperandB - if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) + if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA ) { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } else if (g_CPUFeatures.x86.avx) { - x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA, imlInstruction->op_fpr_r_r_r.regB); } - else if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB ) + else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandB); - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP); } else { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) + if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA ) { - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } - else if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB ) + else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB ) { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandB); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP); } else { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); } } else @@ -970,27 +970,27 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc // todo: Investigate if there are other optimizations possible if the operand registers overlap // generic case // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandB); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB); // 3) Interleave top of frTemp and frC - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandC); + x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC); // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) { // todo: Investigate if there are other optimizations possible if the operand registers overlap // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandB); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB); // 3) Copy bottom from frC - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC); //// 4) Swap bottom and top half //x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP); //float s0 = (float)hCPU->fpr[frC].fp0; //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); @@ -999,48 +999,48 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); + x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC); sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { // select bottom - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); + x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C bottom - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC); sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B bottom PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); // select top - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); // copy top to bottom (todo: May cause stall?) + x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); // copy top to bottom (todo: May cause stall?) x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C top //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC, 2); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC, 2); sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B top PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB, 2); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB, 2); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); } @@ -1052,36 +1052,36 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, { if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); + x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) { - x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) { // convert to 32bit single - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); // convert back to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) { // convert to 32bit singles - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); // convert back to 64bit doubles - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { // convert bottom to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); // copy to top half - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index bc5f5f6ce..618f71421 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -612,40 +612,6 @@ void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRe _x64_op_reg64Low_mem8Reg64(x64GenContext, dstRegister, memRegister64, memImmS32, 0x88); } -void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister) -{ - // LOCK CMPXCHG DWORD [ + + ], (low dword) - x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix - - if( srcRegister >= 8 || memRegisterA64 >= 8|| memRegisterB64 >= 8 ) - x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegisterA64>=8)?1:0)+((memRegisterB64>=8)?2:0)); - - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xB1); - - _x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32); -} - -void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister) -{ - // LOCK CMPXCHG DWORD [ + ], (low dword) - x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix - - if( srcRegister >= 8 || memRegister64 >= 8 ) - x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegister64>=8)?1:0)); - - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xB1); - - if( memImmS32 == 0 ) - { - x64Gen_writeU8(x64GenContext, 0x45+(srcRegister&7)*8); - x64Gen_writeU8(x64GenContext, 0x00); - } - else - assert_dbg(); -} - void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) { // ADD , @@ -812,59 +778,6 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis } } -void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) -{ - // SBB , - if( destRegister >= 8 && srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x45); - else if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x44); - else if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0x19); - x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7)); -} - -void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) -{ - // ADC , - if( destRegister >= 8 && srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x45); - else if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x44); - else if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0x11); - x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7)); -} - -void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32) -{ - sint32 immS32 = (sint32)immU32; - if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - if( immS32 >= -128 && immS32 <= 127 ) - { - x64Gen_writeU8(x64GenContext, 0x83); - x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7)); - x64Gen_writeU8(x64GenContext, (uint8)immS32); - } - else - { - if( srcRegister == X86_REG_RAX ) - { - // special EAX short form - x64Gen_writeU8(x64GenContext, 0x15); - } - else - { - x64Gen_writeU8(x64GenContext, 0x81); - x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7)); - } - x64Gen_writeU32(x64GenContext, immU32); - } -} - void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32) { // DEC dword [+imm] @@ -1295,16 +1208,6 @@ void x64Gen_cdq(x64GenContext_t* x64GenContext) x64Gen_writeU8(x64GenContext, 0x99); } -void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister) -{ - if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41|8); - else - x64Gen_writeU8(x64GenContext, 0x40|8); - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); -} - void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister) { if( destRegister >= 8 ) @@ -1313,16 +1216,6 @@ void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destReg x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); } -void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister) -{ - assert_dbg(); // do not use this instruction, it's result is always undefined. Instead use ROL , 8 - //x64Gen_writeU8(x64GenContext, 0x66); - //if( destRegister >= 8 ) - // x64Gen_writeU8(x64GenContext, 0x41); - //x64Gen_writeU8(x64GenContext, 0x0F); - //x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); -} - void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) { // SSE4 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 650946f33..fe81f574e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -18,7 +18,6 @@ void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_ // todo - rename bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_removeRedundantCRUpdates(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index d24fec87d..eac363711 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -31,7 +31,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) ) { - list_modifiedRegisters.addUnique(instIt.op_r_immS32.registerIndex); + list_modifiedRegisters.addUnique(instIt.op_r_immS32.regR); } } if (list_modifiedRegisters.count > 0) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 2fbf2b6f5..54aa85a35 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -195,9 +195,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.registerIndex); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.addFmt("name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + strOutput.addFmt("name_{} (", inst.op_r_name.regR, inst.op_r_name.name); if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); @@ -227,17 +227,17 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); } else if (inst.type == PPCREC_IML_TYPE_R_R_R) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); } else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -254,30 +254,30 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add("CMP "); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond)); strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerResult, true); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); } else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) { strOutput.add("CMP "); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerOperandA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); strOutput.addFmt("{}", inst.op_compare_s32.immS32); strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerResult, true); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { strOutput.add("CJUMP2 "); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); - if (!inst.op_conditionalJump2.mustBeTrue) + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); + if (!inst.op_conditional_jump.mustBeTrue) strOutput.add("(inverted)"); } else if (inst.type == PPCREC_IML_TYPE_JUMP) @@ -290,8 +290,8 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); } else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) @@ -311,7 +311,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || @@ -391,7 +391,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME) { - strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.registerIndex, inst.op_r_name.name); + strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR, inst.op_r_name.name); if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) { strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); @@ -417,7 +417,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else strOutput.add("ukn"); - strOutput.addFmt(") = fpr_t{}", inst.op_r_name.registerIndex); + strOutput.addFmt(") = fpr_t{}", inst.op_r_name.regR); } else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) { @@ -444,17 +444,17 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); + strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.regR, inst.op_fpr_r_r.regA); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.regR, inst.op_fpr_r_r_r_r.regA, inst.op_fpr_r_r_r_r.regB, inst.op_fpr_r_r_r_r.regC); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.regR, inst.op_fpr_r_r_r.regA, inst.op_fpr_r_r_r.regB); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { @@ -462,7 +462,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - strOutput.addFmt("t{} ", inst.op_conditional_r_s32.registerIndex); + strOutput.addFmt("t{} ", inst.op_conditional_r_s32.regR); bool displayAsHex = false; if (inst.operation == PPCREC_IML_OP_ASSIGN) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index eedbb1eb2..4b14de9a1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -6,31 +6,31 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { - registersUsed->readNamedReg1 = -1; - registersUsed->readNamedReg2 = -1; - registersUsed->readNamedReg3 = -1; - registersUsed->writtenNamedReg1 = -1; - registersUsed->writtenNamedReg2 = -1; - registersUsed->readFPR1 = -1; - registersUsed->readFPR2 = -1; - registersUsed->readFPR3 = -1; - registersUsed->readFPR4 = -1; - registersUsed->writtenFPR1 = -1; + registersUsed->readGPR1 = IMLREG_INVALID; + registersUsed->readGPR2 = IMLREG_INVALID; + registersUsed->readGPR3 = IMLREG_INVALID; + registersUsed->writtenGPR1 = IMLREG_INVALID; + registersUsed->writtenGPR2 = IMLREG_INVALID; + registersUsed->readFPR1 = IMLREG_INVALID; + registersUsed->readFPR2 = IMLREG_INVALID; + registersUsed->readFPR3 = IMLREG_INVALID; + registersUsed->readFPR4 = IMLREG_INVALID; + registersUsed->writtenFPR1 = IMLREG_INVALID; if (type == PPCREC_IML_TYPE_R_NAME) { - registersUsed->writtenNamedReg1 = op_r_name.registerIndex; + registersUsed->writtenGPR1 = op_r_name.regR; } else if (type == PPCREC_IML_TYPE_NAME_R) { - registersUsed->readNamedReg1 = op_r_name.registerIndex; + registersUsed->readGPR1 = op_r_name.regR; } else if (type == PPCREC_IML_TYPE_R_R) { if (operation == PPCREC_IML_OP_DCBZ) { // both operands are read only - registersUsed->readNamedReg1 = op_r_r.registerResult; - registersUsed->readNamedReg2 = op_r_r.registerA; + registersUsed->readGPR1 = op_r_r.regR; + registersUsed->readGPR2 = op_r_r.regA; } else if ( operation == PPCREC_IML_OP_OR || @@ -38,9 +38,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_XOR) { // result is read and written, operand is read - registersUsed->writtenNamedReg1 = op_r_r.registerResult; - registersUsed->readNamedReg1 = op_r_r.registerResult; - registersUsed->readNamedReg2 = op_r_r.registerA; + registersUsed->writtenGPR1 = op_r_r.regR; + registersUsed->readGPR1 = op_r_r.regR; + registersUsed->readGPR2 = op_r_r.regA; } else if ( operation == PPCREC_IML_OP_ASSIGN || @@ -52,8 +52,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) { // result is written, operand is read - registersUsed->writtenNamedReg1 = op_r_r.registerResult; - registersUsed->readNamedReg1 = op_r_r.registerA; + registersUsed->writtenGPR1 = op_r_r.regR; + registersUsed->readGPR1 = op_r_r.regA; } else cemu_assert_unimplemented(); @@ -63,7 +63,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (operation == PPCREC_IML_OP_MTCRF) { // operand register is read only - registersUsed->readNamedReg1 = op_r_immS32.registerIndex; + registersUsed->readGPR1 = op_r_immS32.regR; } else if (operation == PPCREC_IML_OP_ADD || // deprecated operation == PPCREC_IML_OP_SUB || @@ -73,14 +73,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_LEFT_ROTATE) { // operand register is read and write - registersUsed->readNamedReg1 = op_r_immS32.registerIndex; - registersUsed->writtenNamedReg1 = op_r_immS32.registerIndex; + registersUsed->readGPR1 = op_r_immS32.regR; + registersUsed->writtenGPR1 = op_r_immS32.regR; } else { // operand register is write only // todo - use explicit lists, avoid default cases - registersUsed->writtenNamedReg1 = op_r_immS32.registerIndex; + registersUsed->writtenGPR1 = op_r_immS32.regR; } } else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) @@ -88,8 +88,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (operation == PPCREC_IML_OP_ASSIGN) { // result is written, but also considered read (in case the condition fails) - registersUsed->readNamedReg1 = op_conditional_r_s32.registerIndex; - registersUsed->writtenNamedReg1 = op_conditional_r_s32.registerIndex; + registersUsed->readGPR1 = op_conditional_r_s32.regR; + registersUsed->writtenGPR1 = op_conditional_r_s32.regR; } else cemu_assert_unimplemented(); @@ -99,26 +99,26 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (operation == PPCREC_IML_OP_RLWIMI) { // result and operand register are both read, result is written - registersUsed->writtenNamedReg1 = op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = op_r_r_s32.registerResult; - registersUsed->readNamedReg2 = op_r_r_s32.registerA; + registersUsed->writtenGPR1 = op_r_r_s32.regR; + registersUsed->readGPR1 = op_r_r_s32.regR; + registersUsed->readGPR2 = op_r_r_s32.regA; } else { // result is write only and operand is read only - registersUsed->writtenNamedReg1 = op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = op_r_r_s32.registerA; + registersUsed->writtenGPR1 = op_r_r_s32.regR; + registersUsed->readGPR1 = op_r_r_s32.regA; } } else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) { - registersUsed->writtenNamedReg1 = op_r_r_s32_carry.regR; - registersUsed->readNamedReg1 = op_r_r_s32_carry.regA; + registersUsed->writtenGPR1 = op_r_r_s32_carry.regR; + registersUsed->readGPR1 = op_r_r_s32_carry.regA; // some operations read carry switch (operation) { case PPCREC_IML_OP_ADD_WITH_CARRY: - registersUsed->readNamedReg2 = op_r_r_s32_carry.regCarry; + registersUsed->readGPR2 = op_r_r_s32_carry.regCarry; break; case PPCREC_IML_OP_ADD: break; @@ -126,25 +126,25 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const cemu_assert_unimplemented(); } // carry is always written - registersUsed->writtenNamedReg2 = op_r_r_s32_carry.regCarry; + registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry; } else if (type == PPCREC_IML_TYPE_R_R_R) { // in all cases result is written and other operands are read only - registersUsed->writtenNamedReg1 = op_r_r_r.registerResult; - registersUsed->readNamedReg1 = op_r_r_r.registerA; - registersUsed->readNamedReg2 = op_r_r_r.registerB; + registersUsed->writtenGPR1 = op_r_r_r.regR; + registersUsed->readGPR1 = op_r_r_r.regA; + registersUsed->readGPR2 = op_r_r_r.regB; } else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) { - registersUsed->writtenNamedReg1 = op_r_r_r_carry.regR; - registersUsed->readNamedReg1 = op_r_r_r_carry.regA; - registersUsed->readNamedReg2 = op_r_r_r_carry.regB; + registersUsed->writtenGPR1 = op_r_r_r_carry.regR; + registersUsed->readGPR1 = op_r_r_r_carry.regA; + registersUsed->readGPR2 = op_r_r_r_carry.regB; // some operations read carry switch (operation) { case PPCREC_IML_OP_ADD_WITH_CARRY: - registersUsed->readNamedReg3 = op_r_r_r_carry.regCarry; + registersUsed->readGPR3 = op_r_r_r_carry.regCarry; break; case PPCREC_IML_OP_ADD: break; @@ -152,7 +152,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const cemu_assert_unimplemented(); } // carry is always written - registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; + registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry; } else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { @@ -170,25 +170,25 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (operation == PPCREC_IML_MACRO_B_TO_REG) { - registersUsed->readNamedReg1 = op_macro.param; + registersUsed->readGPR1 = op_macro.param; } else cemu_assert_unimplemented(); } else if (type == PPCREC_IML_TYPE_COMPARE) { - registersUsed->readNamedReg1 = op_compare.registerOperandA; - registersUsed->readNamedReg2 = op_compare.registerOperandB; - registersUsed->writtenNamedReg1 = op_compare.registerResult; + registersUsed->readGPR1 = op_compare.regA; + registersUsed->readGPR2 = op_compare.regB; + registersUsed->writtenGPR1 = op_compare.regR; } else if (type == PPCREC_IML_TYPE_COMPARE_S32) { - registersUsed->readNamedReg1 = op_compare_s32.registerOperandA; - registersUsed->writtenNamedReg1 = op_compare_s32.registerResult; + registersUsed->readGPR1 = op_compare_s32.regA; + registersUsed->writtenGPR1 = op_compare_s32.regR; } else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - registersUsed->readNamedReg1 = op_conditionalJump2.registerBool; + registersUsed->readGPR1 = op_conditional_jump.registerBool; } else if (type == PPCREC_IML_TYPE_JUMP) { @@ -196,48 +196,48 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_LOAD) { - registersUsed->writtenNamedReg1 = op_storeLoad.registerData; + registersUsed->writtenGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; } else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) { - registersUsed->writtenNamedReg1 = op_storeLoad.registerData; + registersUsed->writtenGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + registersUsed->readGPR2 = op_storeLoad.registerMem2; } else if (type == PPCREC_IML_TYPE_STORE) { - registersUsed->readNamedReg1 = op_storeLoad.registerData; + registersUsed->readGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = op_storeLoad.registerMem; + registersUsed->readGPR2 = op_storeLoad.registerMem; } else if (type == PPCREC_IML_TYPE_STORE_INDEXED) { - registersUsed->readNamedReg1 = op_storeLoad.registerData; + registersUsed->readGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = op_storeLoad.registerMem; + registersUsed->readGPR2 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg3 = op_storeLoad.registerMem2; + registersUsed->readGPR3 = op_storeLoad.registerMem2; } else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - registersUsed->readNamedReg1 = op_atomic_compare_store.regEA; - registersUsed->readNamedReg2 = op_atomic_compare_store.regCompareValue; - registersUsed->readNamedReg3 = op_atomic_compare_store.regWriteValue; - registersUsed->writtenNamedReg1 = op_atomic_compare_store.regBoolOut; + registersUsed->readGPR1 = op_atomic_compare_store.regEA; + registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue; + registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; + registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { // fpr operation - registersUsed->writtenFPR1 = op_r_name.registerIndex; + registersUsed->writtenFPR1 = op_r_name.regR; } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { // fpr operation - registersUsed->readFPR1 = op_r_name.registerIndex; + registersUsed->readFPR1 = op_r_name.regR; } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { @@ -245,14 +245,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->writtenFPR1 = op_storeLoad.registerData; // address is in gpr register if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; // determine partially written result switch (op_storeLoad.mode) { case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = op_storeLoad.registerGQR; + registersUsed->readGPR2 = op_storeLoad.registerGQR; break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same @@ -282,16 +282,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->writtenFPR1 = op_storeLoad.registerData; // address is in gpr registers if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + registersUsed->readGPR2 = op_storeLoad.registerMem2; // determine partially written result switch (op_storeLoad.mode) { case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = op_storeLoad.registerGQR; + registersUsed->readGPR3 = op_storeLoad.registerGQR; break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same @@ -319,14 +319,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // fpr store operation registersUsed->readFPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = op_storeLoad.registerGQR; + registersUsed->readGPR2 = op_storeLoad.registerGQR; break; default: cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); @@ -339,16 +339,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readFPR1 = op_storeLoad.registerData; // address is in gpr registers if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = op_storeLoad.registerMem; + registersUsed->readGPR1 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = op_storeLoad.registerMem2; + registersUsed->readGPR2 = op_storeLoad.registerMem2; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = op_storeLoad.registerGQR; + registersUsed->readGPR3 = op_storeLoad.registerGQR; break; default: cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); @@ -369,8 +369,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { // operand read, result written - registersUsed->readFPR1 = op_fpr_r_r.registerOperand; - registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r.regA; + registersUsed->writtenFPR1 = op_fpr_r_r.regR; } else if ( operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || @@ -383,9 +383,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const ) { // operand read, result read and (partially) written - registersUsed->readFPR1 = op_fpr_r_r.registerOperand; - registersUsed->readFPR4 = op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r.regA; + registersUsed->readFPR4 = op_fpr_r_r.regR; + registersUsed->writtenFPR1 = op_fpr_r_r.regR; } else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || @@ -397,9 +397,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_SUB_BOTTOM) { // operand read, result read and written - registersUsed->readFPR1 = op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = op_fpr_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r.regA; + registersUsed->readFPR2 = op_fpr_r_r.regR; + registersUsed->writtenFPR1 = op_fpr_r_r.regR; } else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || @@ -407,8 +407,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) { // operand read, result read - registersUsed->readFPR1 = op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = op_fpr_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r.regA; + registersUsed->readFPR2 = op_fpr_r_r.regR; } else cemu_assert_unimplemented(); @@ -416,16 +416,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { // fpr operation - registersUsed->readFPR1 = op_fpr_r_r_r.registerOperandA; - registersUsed->readFPR2 = op_fpr_r_r_r.registerOperandB; - registersUsed->writtenFPR1 = op_fpr_r_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r_r.regA; + registersUsed->readFPR2 = op_fpr_r_r_r.regB; + registersUsed->writtenFPR1 = op_fpr_r_r_r.regR; // handle partially written result switch (operation) { case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: case PPCREC_IML_OP_FPR_ADD_BOTTOM: case PPCREC_IML_OP_FPR_SUB_BOTTOM: - registersUsed->readFPR4 = op_fpr_r_r_r.registerResult; + registersUsed->readFPR4 = op_fpr_r_r_r.regR; break; case PPCREC_IML_OP_FPR_SUB_PAIR: break; @@ -436,15 +436,15 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { // fpr operation - registersUsed->readFPR1 = op_fpr_r_r_r_r.registerOperandA; - registersUsed->readFPR2 = op_fpr_r_r_r_r.registerOperandB; - registersUsed->readFPR3 = op_fpr_r_r_r_r.registerOperandC; - registersUsed->writtenFPR1 = op_fpr_r_r_r_r.registerResult; + registersUsed->readFPR1 = op_fpr_r_r_r_r.regA; + registersUsed->readFPR2 = op_fpr_r_r_r_r.regB; + registersUsed->readFPR3 = op_fpr_r_r_r_r.regC; + registersUsed->writtenFPR1 = op_fpr_r_r_r_r.regR; // handle partially written result switch (operation) { case PPCREC_IML_OP_FPR_SELECT_BOTTOM: - registersUsed->readFPR4 = op_fpr_r_r_r_r.registerResult; + registersUsed->readFPR4 = op_fpr_r_r_r_r.regR; break; case PPCREC_IML_OP_FPR_SUM0: case PPCREC_IML_OP_FPR_SUM1: @@ -464,15 +464,15 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR) { - registersUsed->readFPR1 = op_fpr_r.registerResult; - registersUsed->writtenFPR1 = op_fpr_r.registerResult; + registersUsed->readFPR1 = op_fpr_r.regR; + registersUsed->writtenFPR1 = op_fpr_r.regR; } else cemu_assert_unimplemented(); } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { - registersUsed->writtenNamedReg1 = op_fpr_compare.regR; + registersUsed->writtenGPR1 = op_fpr_compare.regR; registersUsed->readFPR1 = op_fpr_compare.regA; registersUsed->readFPR2 = op_fpr_compare.regB; } @@ -511,29 +511,29 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl { if (type == PPCREC_IML_TYPE_R_NAME) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); + op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_NAME_R) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); + op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_R_R) { - op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, translationTable); - op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, translationTable); + op_r_r.regR = replaceRegisterMultiple(op_r_r.regR, translationTable); + op_r_r.regA = replaceRegisterMultiple(op_r_r.regA, translationTable); } else if (type == PPCREC_IML_TYPE_R_S32) { - op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, translationTable); + op_r_immS32.regR = replaceRegisterMultiple(op_r_immS32.regR, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, translationTable); + op_conditional_r_s32.regR = replaceRegisterMultiple(op_conditional_r_s32.regR, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32) { - op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, translationTable); - op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, translationTable); + op_r_r_s32.regR = replaceRegisterMultiple(op_r_r_s32.regR, translationTable); + op_r_r_s32.regA = replaceRegisterMultiple(op_r_r_s32.regA, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) { @@ -543,9 +543,9 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (type == PPCREC_IML_TYPE_R_R_R) { - op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, translationTable); - op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, translationTable); - op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, translationTable); + op_r_r_r.regR = replaceRegisterMultiple(op_r_r_r.regR, translationTable); + op_r_r_r.regA = replaceRegisterMultiple(op_r_r_r.regA, translationTable); + op_r_r_r.regB = replaceRegisterMultiple(op_r_r_r.regB, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -556,18 +556,18 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (type == PPCREC_IML_TYPE_COMPARE) { - op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, translationTable); - op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, translationTable); - op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, translationTable); + op_compare.regR = replaceRegisterMultiple(op_compare.regR, translationTable); + op_compare.regA = replaceRegisterMultiple(op_compare.regA, translationTable); + op_compare.regB = replaceRegisterMultiple(op_compare.regB, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE_S32) { - op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, translationTable); - op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, translationTable); + op_compare_s32.regR = replaceRegisterMultiple(op_compare_s32.regR, translationTable); + op_compare_s32.regA = replaceRegisterMultiple(op_compare_s32.regA, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); + op_conditional_jump.registerBool = replaceRegisterMultiple(op_conditional_jump.registerBool, translationTable); } else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { @@ -775,11 +775,11 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { @@ -799,25 +799,25 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist } else if (type == PPCREC_IML_TYPE_FPR_R_R) { - op_fpr_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.registerOperand = replaceRegisterMultiple(op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regR = replaceRegisterMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regA = replaceRegisterMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { - op_fpr_r_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.registerOperandA = replaceRegisterMultiple(op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.registerOperandB = replaceRegisterMultiple(op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - op_fpr_r_r_r_r.registerResult = replaceRegisterMultiple(op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandA = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandB = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandC = replaceRegisterMultiple(op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regC = replaceRegisterMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R) { - op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r.regR = replaceRegisterMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { @@ -894,11 +894,11 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - op_r_name.registerIndex = replaceRegister(op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { @@ -918,25 +918,25 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe } else if (type == PPCREC_IML_TYPE_FPR_R_R) { - op_fpr_r_r.registerResult = replaceRegister(op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.registerOperand = replaceRegister(op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regR = replaceRegister(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regA = replaceRegister(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { - op_fpr_r_r_r.registerResult = replaceRegister(op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.registerOperandA = replaceRegister(op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.registerOperandB = replaceRegister(op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regR = replaceRegister(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regA = replaceRegister(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regB = replaceRegister(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - op_fpr_r_r_r_r.registerResult = replaceRegister(op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandA = replaceRegister(op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandB = replaceRegister(op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.registerOperandC = replaceRegister(op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regR = replaceRegister(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regA = replaceRegister(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regB = replaceRegister(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regC = replaceRegister(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R) { - op_fpr_r.registerResult = replaceRegister(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r.regR = replaceRegister(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index f8e5a646a..feb494b29 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -1,5 +1,10 @@ #pragma once +using IMLReg = uint8; +inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; + +using IMLName = uint32; + enum { PPCREC_IML_OP_ASSIGN, // '=' operator @@ -65,8 +70,6 @@ enum PPCREC_IML_OP_FPR_SUM1, - - // working towards defining ops per-form // R_R_R only // R_R_S32 only @@ -110,9 +113,6 @@ enum class IMLCondition : uint8 UNSIGNED_GT, UNSIGNED_LT, - SIGNED_OVERFLOW, - SIGNED_NOVERFLOW, - // floating point conditions UNORDERED_GT, // a > b, false if either is NaN UNORDERED_LT, // a < b, false if either is NaN @@ -171,7 +171,7 @@ enum PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* }; -enum +enum // IMLName { PPCREC_NAME_NONE, PPCREC_NAME_TEMPORARY = 1000, @@ -230,15 +230,16 @@ enum struct IMLUsedRegisters { + // GPR union { struct { - sint16 readNamedReg1; - sint16 readNamedReg2; - sint16 readNamedReg3; - sint16 writtenNamedReg1; - sint16 writtenNamedReg2; + IMLReg readGPR1; + IMLReg readGPR2; + IMLReg readGPR3; + IMLReg writtenGPR1; + IMLReg writtenGPR2; }; }; // FPR @@ -247,59 +248,58 @@ struct IMLUsedRegisters struct { // note: If destination operand is not fully written (PS0 and PS1) it will be added to the read registers - sint16 readFPR1; - sint16 readFPR2; - sint16 readFPR3; - sint16 readFPR4; - sint16 writtenFPR1; + IMLReg readFPR1; + IMLReg readFPR2; + IMLReg readFPR3; + IMLReg readFPR4; + IMLReg writtenFPR1; }; - //sint16 fpr[4]; }; - bool IsRegWritten(sint16 imlReg) const // GPRs + bool IsGPRWritten(IMLReg imlReg) const { - cemu_assert_debug(imlReg >= 0); - return writtenNamedReg1 == imlReg || writtenNamedReg2 == imlReg; + cemu_assert_debug(imlReg != IMLREG_INVALID); + return writtenGPR1 == imlReg || writtenGPR2 == imlReg; } template - void ForEachWrittenGPR(Fn F) + void ForEachWrittenGPR(Fn F) const { - if (writtenNamedReg1 >= 0) - F(writtenNamedReg1); - if (writtenNamedReg2 >= 0) - F(writtenNamedReg2); + if (writtenGPR1 != IMLREG_INVALID) + F(writtenGPR1); + if (writtenGPR2 != IMLREG_INVALID) + F(writtenGPR2); } template - void ForEachReadGPR(Fn F) + void ForEachReadGPR(Fn F) const { - if (readNamedReg1 >= 0) - F(readNamedReg1); - if (readNamedReg2 >= 0) - F(readNamedReg2); - if (readNamedReg3 >= 0) - F(readNamedReg3); + if (readGPR1 != IMLREG_INVALID) + F(readGPR1); + if (readGPR2 != IMLREG_INVALID) + F(readGPR2); + if (readGPR3 != IMLREG_INVALID) + F(readGPR3); } template - void ForEachAccessedGPR(Fn F) + void ForEachAccessedGPR(Fn F) const { - if (readNamedReg1 >= 0) - F(readNamedReg1, false); - if (readNamedReg2 >= 0) - F(readNamedReg2, false); - if (readNamedReg3 >= 0) - F(readNamedReg3, false); - if (writtenNamedReg1 >= 0) - F(writtenNamedReg1, true); - if (writtenNamedReg2 >= 0) - F(writtenNamedReg2, true); + if (readGPR1 != IMLREG_INVALID) + F(readGPR1, false); + if (readGPR2 != IMLREG_INVALID) + F(readGPR2, false); + if (readGPR3 != IMLREG_INVALID) + F(readGPR3, false); + if (writtenGPR1 != IMLREG_INVALID) + F(writtenGPR1, true); + if (writtenGPR2 != IMLREG_INVALID) + F(writtenGPR2, true); } bool HasFPRReg(sint16 imlReg) const { - cemu_assert_debug(imlReg >= 0); + cemu_assert_debug(imlReg != IMLREG_INVALID); if (readFPR1 == imlReg) return true; if (readFPR2 == imlReg) @@ -314,10 +314,6 @@ struct IMLUsedRegisters } }; -using IMLReg = uint8; - -inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; - struct IMLInstruction { uint8 type; @@ -330,14 +326,14 @@ struct IMLInstruction }padding; struct { - uint8 registerResult; - uint8 registerA; + IMLReg regR; + IMLReg regA; }op_r_r; struct { - uint8 registerResult; - uint8 registerA; - uint8 registerB; + IMLReg regR; + IMLReg regA; + IMLReg regB; }op_r_r_r; struct { @@ -348,25 +344,25 @@ struct IMLInstruction }op_r_r_r_carry; struct { - uint8 registerResult; - uint8 registerA; + IMLReg regR; + IMLReg regA; sint32 immS32; }op_r_r_s32; struct { IMLReg regR; IMLReg regA; - sint32 immS32; IMLReg regCarry; + sint32 immS32; }op_r_r_s32_carry; struct { - uint8 registerIndex; - uint32 name; + IMLReg regR; + IMLName name; }op_r_name; // alias op_name_r struct { - uint8 registerIndex; + IMLReg regR; sint32 immS32; }op_r_immS32; struct @@ -377,10 +373,10 @@ struct IMLInstruction }op_macro; struct { - uint8 registerData; - uint8 registerMem; - uint8 registerMem2; - uint8 registerGQR; + IMLReg registerData; + IMLReg registerMem; + IMLReg registerMem2; + IMLReg registerGQR; uint8 copyWidth; struct { @@ -393,28 +389,25 @@ struct IMLInstruction }op_storeLoad; struct { - uint8 registerResult; - uint8 registerOperand; - uint8 flags; + IMLReg regR; + IMLReg regA; }op_fpr_r_r; struct { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 flags; + IMLReg regR; + IMLReg regA; + IMLReg regB; }op_fpr_r_r_r; struct { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 registerOperandC; - uint8 flags; + IMLReg regR; + IMLReg regA; + IMLReg regB; + IMLReg regC; }op_fpr_r_r_r_r; struct { - uint8 registerResult; + IMLReg regR; }op_fpr_r; struct { @@ -425,41 +418,35 @@ struct IMLInstruction }op_fpr_compare; struct { - uint8 crD; // crBitIndex (result) - uint8 crA; // crBitIndex - uint8 crB; // crBitIndex - }op_cr; - struct - { - uint8 registerResult; // stores the boolean result of the comparison - uint8 registerOperandA; - uint8 registerOperandB; + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; IMLCondition cond; }op_compare; struct { - uint8 registerResult; // stores the boolean result of the comparison - uint8 registerOperandA; + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; sint32 immS32; IMLCondition cond; }op_compare_s32; struct { - uint8 registerBool; + IMLReg registerBool; bool mustBeTrue; - }op_conditionalJump2; + }op_conditional_jump; struct { IMLReg regEA; IMLReg regCompareValue; IMLReg regWriteValue; - IMLReg regBoolOut; // boolean 0/1 + IMLReg regBoolOut; }op_atomic_compare_store; // conditional operations (emitted if supported by target platform) struct { // r_s32 - uint8 registerIndex; + uint8 regR; sint32 immS32; // condition uint8 crRegisterIndex; @@ -514,15 +501,15 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; - this->op_r_r.registerResult = registerResult; - this->op_r_r.registerA = registerA; + this->op_r_r.regR = registerResult; + this->op_r_r.regA = registerA; } void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32) { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; - this->op_r_immS32.registerIndex = registerIndex; + this->op_r_immS32.regR = registerIndex; this->op_r_immS32.immS32 = immS32; } @@ -530,9 +517,9 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; - this->op_r_r_r.registerResult = registerResult; - this->op_r_r_r.registerA = registerA; - this->op_r_r_r.registerB = registerB; + this->op_r_r_r.regR = registerResult; + this->op_r_r_r.regA = registerA; + this->op_r_r_r.regB = registerB; } void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry) @@ -549,8 +536,8 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; - this->op_r_r_s32.registerResult = registerResult; - this->op_r_r_s32.registerA = registerA; + this->op_r_r_s32.regR = registerResult; + this->op_r_r_s32.regA = registerA; this->op_r_r_s32.immS32 = immS32; } @@ -568,9 +555,9 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE; this->operation = -999; - this->op_compare.registerResult = registerResult; - this->op_compare.registerOperandA = registerA; - this->op_compare.registerOperandB = registerB; + this->op_compare.regR = registerResult; + this->op_compare.regA = registerA; + this->op_compare.regB = registerB; this->op_compare.cond = cond; } @@ -578,8 +565,8 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE_S32; this->operation = -999; - this->op_compare_s32.registerResult = registerResult; - this->op_compare_s32.registerOperandA = registerA; + this->op_compare_s32.regR = registerResult; + this->op_compare_s32.regA = registerA; this->op_compare_s32.immS32 = immS32; this->op_compare_s32.cond = cond; } @@ -588,8 +575,8 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; - this->op_conditionalJump2.registerBool = registerBool; - this->op_conditionalJump2.mustBeTrue = mustBeTrue; + this->op_conditional_jump.registerBool = registerBool; + this->op_conditional_jump.mustBeTrue = mustBeTrue; } void make_jump() diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index a1569d335..7a5ad3798 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,6 +6,15 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" +bool _RegExceedsFPRSpace(IMLReg r) +{ + if (r == IMLREG_INVALID) + return false; + if ((uint32)r >= PPC_X64_FPR_USABLE_REGISTERS) + return true; + return false; +} + bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { // only xmm0 to xmm14 may be used, xmm15 is reserved @@ -21,7 +30,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) { - if( imlInstructionItr.op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) + if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) { imlInstructionItr.make_no_op(); } @@ -39,93 +48,96 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte while( true ) { segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); - if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) + if(registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS) { // get index of register to replace sint32 fprToReplace = -1; - if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) + if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) fprToReplace = registersUsed.readFPR1; - else if( registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS ) + else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) fprToReplace = registersUsed.readFPR2; - else if (registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS) + else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) fprToReplace = registersUsed.readFPR3; - else if (registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS) + else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) fprToReplace = registersUsed.readFPR4; - else if( registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) + else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) fprToReplace = registersUsed.writtenFPR1; - // generate mask of useable registers - uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 - if( registersUsed.readFPR1 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.readFPR1)); - if( registersUsed.readFPR2 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.readFPR2)); - if (registersUsed.readFPR3 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR3)); - if (registersUsed.readFPR4 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR4)); - if( registersUsed.writtenFPR1 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.writtenFPR1)); - // get highest unused register index (0-6 range) - sint32 unusedRegisterIndex = -1; - for(sint32 f=0; f= 0) { - if( useableRegisterMask&(1<mappedFPRRegister[unusedRegisterIndex]; - bool replacedRegisterIsUsed = true; - if( unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0+32) ) - { - replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; - } - // replace registers that are out of range - segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); - // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); - // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); - // name_unusedRegister = unusedRegister - IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - } - else - imlInstructionItr->make_no_op(); - imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // name_gprToReplace = unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // unusedRegister = name_unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { + if (unusedRegisterIndex == -1) + assert_dbg(); + // determine if the placeholder register is actually used (if not we must not load/store it) + uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; + bool replacedRegisterIsUsed = true; + if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) + { + replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; + } + // replace registers that are out of range + segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); + // add load/store name after instruction + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); + // add load/store before current instruction + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); + // name_unusedRegister = unusedRegister + IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); + if (replacedRegisterIsUsed) + { + imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; + imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; + imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; + } + else + imlInstructionItr->make_no_op(); + imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; + imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; + // name_gprToReplace = unusedRegister + imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); + imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; + imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; + imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; + // unusedRegister = name_unusedRegister + imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); + memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); + if (replacedRegisterIsUsed) + { + imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; + imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; + imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; + } + else + imlInstructionItr->make_no_op(); } - else - imlInstructionItr->make_no_op(); } else break; @@ -207,9 +219,9 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon if (idxInst.IsSuffixInstruction()) break; idxInst.CheckRegisterUsage(®istersUsed); - sint32 fprMatch[4]; + sint32 fprMatch[4]; // should be IMLReg, but this code is being dropped soon anyway sint32 fprReplace[4]; - fprMatch[0] = -1; + fprMatch[0] = -1; // should be IMLREG_INVALID fprMatch[1] = -1; fprMatch[2] = -1; fprMatch[3] = -1; @@ -233,7 +245,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon virtualFpr = registersUsed.readFPR4; else if (f == 4) virtualFpr = registersUsed.writtenFPR1; - if( virtualFpr < 0 ) + if(virtualFpr == IMLREG_INVALID) continue; cemu_assert_debug(virtualFpr < 64); // check if this virtual FPR is already loaded in any real register @@ -257,7 +269,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = (uint8)(unloadRegMapping - rCtx.currentMapping); + imlInstructionTemp->op_r_name.regR = (uint8)(unloadRegMapping - rCtx.currentMapping); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; idx++; // update mapping @@ -273,7 +285,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = (uint8)(regMapping-rCtx.currentMapping); + imlInstructionTemp->op_r_name.regR = (uint8)(regMapping-rCtx.currentMapping); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr]; idx++; // update mapping @@ -333,7 +345,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = i; + imlInstructionTemp->op_r_name.regR = i; imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; idx++; } @@ -357,15 +369,15 @@ bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) */ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; + IMLReg registerIndex = nameStoreInstruction->op_r_name.regR; for(size_t i=startIndex; iimlList.size(); i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) + if( registersUsed.readGPR1 == registerIndex || registersUsed.readGPR2 == registerIndex || registersUsed.readGPR3 == registerIndex ) return false; - if (registersUsed.IsRegWritten(registerIndex)) + if (registersUsed.IsGPRWritten(registerIndex)) return true; } // todo: Scan next segment(s) @@ -377,7 +389,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG */ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; + sint16 registerIndex = nameStoreInstruction->op_r_name.regR; for(size_t i=startIndex; iimlList.size(); i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; @@ -397,13 +409,13 @@ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcI */ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; + sint16 registerIndex = nameStoreInstruction->op_r_name.regR; for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.IsRegWritten(registerIndex) ) + if( registersUsed.IsGPRWritten(registerIndex) ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) return true; @@ -456,7 +468,7 @@ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcI */ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; + sint16 registerIndex = nameStoreInstruction->op_r_name.regR; for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; @@ -473,228 +485,6 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc return false; } -uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, uint32 currentOverwriteMask, uint32 currentReadMask, uint32 scanDepth) -{ - // is any bit overwritten but not read? - uint32 overwriteMask = imlSegment->crBitsWritten&~imlSegment->crBitsInput; - currentOverwriteMask |= overwriteMask; - // next segment - if( imlSegment->nextSegmentIsUncertain == false && scanDepth < 3 ) - { - uint32 nextSegmentOverwriteMask = 0; - if( imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask0 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, 0, scanDepth+1); - uint32 mask1 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, scanDepth+1); - nextSegmentOverwriteMask = mask0&mask1; - } - else if( imlSegment->nextSegmentBranchNotTaken) - { - nextSegmentOverwriteMask = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, scanDepth+1); - } - nextSegmentOverwriteMask &= ~imlSegment->crBitsRead; - currentOverwriteMask |= nextSegmentOverwriteMask; - } - else if (imlSegment->nextSegmentIsUncertain) - { - if (ppcImlGenContext->segmentList2.size() >= 5) - { - return 7; // for more complex functions we assume that CR is not passed on (hack) - } - } - return currentOverwriteMask; -} - -/* - * Returns a mask of all CR bits that are overwritten (written but not read) in the segment and all it's following segments - * If the write state of a CR bit cannot be determined, it is returned as 0 (not overwritten) - */ -uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) -{ - __debugbreak(); // deprecated - - if (imlSegment->nextSegmentIsUncertain) - { - return 0; - } - if( imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask0 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, 0, 0); - uint32 mask1 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, 0); - return mask0&mask1; // only return bits that are overwritten in both branches - } - else if( imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, 0); - return mask; - } - else - { - // not implemented - } - return 0; -} - -void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) -{ - __debugbreak(); // deprecated - - //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - //{ - // for(IMLInstruction& instIt : segIt->imlList) - // { - // if (instIt.type == PPCREC_IML_TYPE_CJUMP) - // { - // if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - // { - // uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); - // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - // segIt->crBitsRead |= (crBitFlag); - // } - // } - // else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - // { - // uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); - // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - // segIt->crBitsRead |= (crBitFlag); - // } - // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) - // { - // segIt->crBitsRead |= 0xFFFFFFFF; - // } - // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) - // { - // segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); - // } - // else if( instIt.type == PPCREC_IML_TYPE_CR ) - // { - // if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || - // instIt.operation == PPCREC_IML_OP_CR_SET) - // { - // uint32 crBitFlag = 1 << (instIt.op_cr.crD); - // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - // } - // else if (instIt.operation == PPCREC_IML_OP_CR_OR || - // instIt.operation == PPCREC_IML_OP_CR_ORC || - // instIt.operation == PPCREC_IML_OP_CR_AND || - // instIt.operation == PPCREC_IML_OP_CR_ANDC) - // { - // uint32 crBitFlag = 1 << (instIt.op_cr.crD); - // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - // crBitFlag = 1 << (instIt.op_cr.crA); - // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - // crBitFlag = 1 << (instIt.op_cr.crB); - // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - // } - // else - // cemu_assert_unimplemented(); - // } - // else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - // { - // segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); - // } - // else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) - // { - // // overwrites CR0 - // segIt->crBitsWritten |= (0xF<<0); - // } - // } - //} - //// flag instructions that write to CR where we can ignore individual CR bits - //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - //{ - // for (IMLInstruction& instIt : segIt->imlList) - // { - // if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - // { - // uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); - // uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); - // uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; - // instIt.crIgnoreMask = crIgnoreMask; - // } - // } - //} -} - -//bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) -//{ -// IMLUsedRegisters registersUsed; -// for (sint32 i = startIndex; i <= endIndex; i++) -// { -// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; -// imlInstruction->CheckRegisterUsage(®istersUsed); -// if (registersUsed.IsRegWritten(vreg)) -// return true; -// } -// return false; -//} - -//sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) -//{ -// // current segment -// sint32 currentIndex = startIndex; -// IMLSegment* currentSegment = startSegment; -// sint32 segmentIterateCount = 0; -// sint32 foundRegister = -1; -// while (true) -// { -// // stop scanning if segment is enterable -// if (currentSegment->isEnterable) -// return -1; -// while (currentIndex >= 0) -// { -// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) -// { -// foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; -// break; -// } -// // previous instruction -// currentIndex--; -// } -// if (foundRegister >= 0) -// break; -// // continue at previous segment (if there is only one) -// if (segmentIterateCount >= 1) -// return -1; -// if (currentSegment->list_prevSegments.size() != 1) -// return -1; -// currentSegment = currentSegment->list_prevSegments[0]; -// currentIndex = currentSegment->imlList.size() - 1; -// segmentIterateCount++; -// } -// // scan again to make sure the register is not modified inbetween -// currentIndex = startIndex; -// currentSegment = startSegment; -// segmentIterateCount = 0; -// IMLUsedRegisters registersUsed; -// while (true) -// { -// while (currentIndex >= 0) -// { -// // check if register is modified -// currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); -// if (registersUsed.IsRegWritten(foundRegister)) -// return -1; -// // check if end of scan reached -// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) -// { -// return foundRegister; -// } -// // previous instruction -// currentIndex--; -// } -// // continue at previous segment (if there is only one) -// if (segmentIterateCount >= 1) -// return -1; -// if (currentSegment->list_prevSegments.size() != 1) -// return -1; -// currentSegment = currentSegment->list_prevSegments[0]; -// currentIndex = currentSegment->imlList.size() - 1; -// segmentIterateCount++; -// } -// return -1; -//} - void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) { IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; @@ -820,13 +610,13 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp } // check if GPR is accessed imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.readNamedReg1 == gprIndex || - registersUsed.readNamedReg2 == gprIndex || - registersUsed.readNamedReg3 == gprIndex) + if (registersUsed.readGPR1 == gprIndex || + registersUsed.readGPR2 == gprIndex || + registersUsed.readGPR3 == gprIndex) { break; } - if (registersUsed.IsRegWritten(gprIndex)) + if (registersUsed.IsGPRWritten(gprIndex)) return; // GPR overwritten, we don't need to byte swap anymore } if (foundMatch) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 98ca687b5..294fb0e3e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -132,7 +132,7 @@ void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertInde memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = registerIndex; + imlInstructionItr->op_r_name.regR = registerIndex; imlInstructionItr->op_r_name.name = registerName; } @@ -145,7 +145,7 @@ void PPCRecRA_insertGPRLoadInstructions(IMLSegment* imlSegment, sint32 insertInd IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = (uint8)loadList[i].registerIndex; + imlInstructionItr->op_r_name.regR = (uint8)loadList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; } } @@ -157,7 +157,7 @@ void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertInd memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = registerIndex; + imlInstructionItr->op_r_name.regR = registerIndex; imlInstructionItr->op_r_name.name = registerName; } @@ -171,7 +171,7 @@ void PPCRecRA_insertGPRStoreInstructions(IMLSegment* imlSegment, sint32 insertIn memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = (uint8)storeList[i].registerIndex; + imlInstructionItr->op_r_name.regR = (uint8)storeList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 539f075a1..c48a599c4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -62,7 +62,7 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; imlInstruction->operation = operation; // r_s32 operation - imlInstruction->op_conditional_r_s32.registerIndex = registerIndex; + imlInstruction->op_conditional_r_s32.regR = registerIndex; imlInstruction->op_conditional_r_s32.immS32 = immS32; // condition imlInstruction->op_conditional_r_s32.crRegisterIndex = crRegisterIndex; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index ac0a46bd9..f2f5f0d3c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -70,9 +70,8 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R; imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r.registerOperand = registerOperand; - imlInstruction->op_fpr_r_r.flags = 0; + imlInstruction->op_fpr_r_r.regR = registerResult; + imlInstruction->op_fpr_r_r.regA = registerOperand; } void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand1, uint8 registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) @@ -81,10 +80,9 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R; imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1; - imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2; - imlInstruction->op_fpr_r_r_r.flags = 0; + imlInstruction->op_fpr_r_r_r.regR = registerResult; + imlInstruction->op_fpr_r_r_r.regA = registerOperand1; + imlInstruction->op_fpr_r_r_r.regB = registerOperand2; } void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperandA, uint8 registerOperandB, uint8 registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) @@ -93,11 +91,10 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R_R; imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA; - imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB; - imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC; - imlInstruction->op_fpr_r_r_r_r.flags = 0; + imlInstruction->op_fpr_r_r_r_r.regR = registerResult; + imlInstruction->op_fpr_r_r_r_r.regA = registerOperandA; + imlInstruction->op_fpr_r_r_r_r.regB = registerOperandB; + imlInstruction->op_fpr_r_r_r_r.regC = registerOperandC; } void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister) @@ -107,7 +104,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_R; imlInstruction->operation = operation; - imlInstruction->op_fpr_r.registerResult = registerResult; + imlInstruction->op_fpr_r.regR = registerResult; } /* From e86fa57cadaff3de962bbc191ac704aa7f6c3d50 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 5 Jan 2023 07:05:47 +0100 Subject: [PATCH 31/64] PPCRec: Simplify PPC and IML logic instructions Also implement PPC NAND instruction --- .../Recompiler/BackendX64/BackendX64.cpp | 30 -- .../Recompiler/IML/IMLInstruction.cpp | 25 +- .../Recompiler/PPCRecompilerImlGen.cpp | 407 +++++------------- 3 files changed, 117 insertions(+), 345 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 43cdb7982..7fcf5d991 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -562,24 +562,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.regR, reg32ToReg16(imlInstruction->op_r_r.regA)); } - else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) - { - if( imlInstruction->operation == PPCREC_IML_OP_OR ) - { - // registerResult |= registerA - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_AND ) - { - // registerResult &= registerA - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - } - else - { - // registerResult ^= registerA - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - } - } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { // copy register content if different registers @@ -652,18 +634,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_AND ) - { - x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_OR ) - { - x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) - { - x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); - } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { if( (imlInstruction->op_r_immS32.immS32&0x80) ) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 4b14de9a1..b7ab28e90 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -32,16 +32,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_r_r.regR; registersUsed->readGPR2 = op_r_r.regA; } - else if ( - operation == PPCREC_IML_OP_OR || - operation == PPCREC_IML_OP_AND || - operation == PPCREC_IML_OP_XOR) - { - // result is read and written, operand is read - registersUsed->writtenGPR1 = op_r_r.regR; - registersUsed->readGPR1 = op_r_r.regR; - registersUsed->readGPR2 = op_r_r.regA; - } else if ( operation == PPCREC_IML_OP_ASSIGN || operation == PPCREC_IML_OP_ENDIAN_SWAP || @@ -60,17 +50,18 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_S32) { + cemu_assert_debug(operation != PPCREC_IML_OP_ADD && + operation != PPCREC_IML_OP_SUB && + operation != PPCREC_IML_OP_AND && + operation != PPCREC_IML_OP_OR && + operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these + if (operation == PPCREC_IML_OP_MTCRF) { // operand register is read only registersUsed->readGPR1 = op_r_immS32.regR; } - else if (operation == PPCREC_IML_OP_ADD || // deprecated - operation == PPCREC_IML_OP_SUB || - operation == PPCREC_IML_OP_AND || - operation == PPCREC_IML_OP_OR || - operation == PPCREC_IML_OP_XOR || - operation == PPCREC_IML_OP_LEFT_ROTATE) + else if (operation == PPCREC_IML_OP_LEFT_ROTATE) { // operand register is read and write registersUsed->readGPR1 = op_r_immS32.regR; @@ -87,7 +78,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { if (operation == PPCREC_IML_OP_ASSIGN) { - // result is written, but also considered read (in case the condition fails) + // result is written, but also considered read (in case the condition is false the input is preserved) registersUsed->readGPR1 = op_conditional_r_s32.regR; registersUsed->writtenGPR1 = op_conditional_r_s32.regR; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index c48a599c4..8d8a2cb5b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -982,7 +982,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc if (SH != 0) ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH); if (mask != 0xFFFFFFFF) - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask); } if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1014,7 +1014,7 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if( mask != 0xFFFFFFFF ) - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, registerRA, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask); if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); return true; @@ -1336,22 +1336,25 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA); - IMLReg tmpReg = _GetRegTemporary(ppcImlGenContext, 0); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); uint32 memOffset = 0; while (nb > 0) { if (rD == rA) return false; cemu_assert(rD < 32); - IMLReg destinationRegister = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); // load bytes one-by-one for (sint32 b = 0; b < 4; b++) { - ppcImlGenContext->emitInst().make_r_memory(tmpReg, memReg, memOffset + b, 8, false, false); + ppcImlGenContext->emitInst().make_r_memory(regTmp, memReg, memOffset + b, 8, false, false); sint32 shiftAmount = (3 - b) * 8; if(shiftAmount) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, tmpReg, tmpReg, shiftAmount); - ppcImlGenContext->emitInst().make_r_r(b == 0 ? PPCREC_IML_OP_ASSIGN : PPCREC_IML_OP_OR, destinationRegister, tmpReg); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regTmp, regTmp, shiftAmount); + if(b == 0) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regDst, regTmp); + else + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regDst, regDst, regTmp); nb--; if (nb == 0) break; @@ -1369,23 +1372,23 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( nb == 0 ) nb = 32; - IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA); - IMLReg tmpReg = _GetRegTemporary(ppcImlGenContext, 0); + IMLReg regMem = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); uint32 memOffset = 0; while (nb > 0) { if (rS == rA) return false; cemu_assert(rS < 32); - IMLReg dataRegister = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS); // store bytes one-by-one for (sint32 b = 0; b < 4; b++) { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regSrc); sint32 shiftAmount = (3 - b) * 8; if (shiftAmount) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, tmpReg, tmpReg, shiftAmount); - ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regTmp, shiftAmount); + ppcImlGenContext->emitInst().make_memory_r(regTmp, regMem, memOffset + b, 8, false); nb--; if (nb == 0) break; @@ -1491,106 +1494,21 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return true; } -bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_OR_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // check for MR mnemonic - if( rS == rB ) - { - // simple register copy - if( rA != rS ) // check if no-op - { - sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - } - if ((opcode & PPC_OPC_RC)) - { - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - } - else - { - // rA = rS | rA - sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - // rA |= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - return true; -} - -bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rS, rA, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //hCPU->gpr[rA] = ~(hCPU->gpr[rS] | hCPU->gpr[rB]); - // check for NOT mnemonic - if (rS == rB) - { - // simple register copy with NOT - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - if (gprDestReg != gprSourceReg) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + if(regS == regB) // check for MR mnemonic + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); else - { - // rA = rS | rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - if (gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg) - { - // make sure we don't overwrite rS or rA - if (gprSource1Reg == gprDestReg) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } - else - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - else - { - // rA = rS - if (gprDestReg != gprSource1Reg) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA |= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regB); + if(complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1610,60 +1528,21 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return true; } -bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_AND_NAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { - sint32 rS, rA, rB; + int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // check for MR mnemonic - if( rS == rB ) - { - // simple register copy - if( rA != rS ) // check if no-op - { - sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - else - { - cemu_assert_unimplemented(); // no-op -> verify this case - } - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + if (regS == regB) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); else - { - // rA = rS & rA - sint32 gprSource1Reg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprSource2Reg = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); - } - else - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg); - } - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA &= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regB); + if (complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1671,85 +1550,19 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //hCPU->gpr[rA] = hCPU->gpr[rS] & ~hCPU->gpr[rB]; - //if (Opcode & PPC_OPC_RC) { - if( rS == rB ) - { - // result is always 0 -> replace with XOR rA,rA - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - else if( rA == rB ) - { - // rB already in rA, therefore we complement rA first and then AND it with rS - sint32 gprRS = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = _GetRegGPR(ppcImlGenContext, rA); - // rA = ~rA - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - // rA &= rS - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_AND, gprDestReg, gprRS); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } - else - { - // a & (~b) is the same as ~((~a) | b) - sint32 gprDestReg = _GetRegGPR(ppcImlGenContext, rA); - sint32 gprRB = _GetRegGPR(ppcImlGenContext, rB); - sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // move rS to rA (if required) - if( gprDestReg != gprRS ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprRS); - } - // rS already in rA, therefore we complement rS first and then OR it with rB - // rA = ~rA - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - // rA |= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_OR, gprDestReg, gprRB); - // rA = ~rA - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); - } + // rA = rS & ~rB; + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + sint32 regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regTmp); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } -void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - sint32 gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA &= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); - // ANDI. always sets cr0 - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); -} - -void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - IMLReg gprSourceReg = _GetRegGPR(ppcImlGenContext, rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA &= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, gprDestReg, (sint32)imm); - // ANDIS. always sets cr0 - PPCImlGen_UpdateCR0(ppcImlGenContext, gprDestReg); -} - -bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); @@ -1764,69 +1577,61 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB); } + if (complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); if (opcode & PPC_OPC_RC) PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } - -bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { - sint32 rS, rA, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); - if( rS == rB ) + sint32 rS, rA; + uint32 imm; + if (isShifted) { - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, -1); + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); } else { - // rA = ~(rS ^ rB) - IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); - IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); } - if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, regA); - return true; -} - -void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regS, (sint32)imm); + // ANDI/ANDIS always updates cr0 + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); } -void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { sint32 rS, rA; uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + if (isShifted) + { + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); + } IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm); } -void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { sint32 rS, rA; uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); - IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm); -} - -void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + if (isShifted) + { + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); + } IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm); @@ -2308,23 +2113,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_RLWNM(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 24: - PPCRecompilerImlGen_ORI(ppcImlGenContext, opcode); + case 24: // ORI + PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, false); break; - case 25: - PPCRecompilerImlGen_ORIS(ppcImlGenContext, opcode); + case 25: // ORIS + PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, true); break; - case 26: - PPCRecompilerImlGen_XORI(ppcImlGenContext, opcode); + case 26: // XORI + PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, false); break; - case 27: - PPCRecompilerImlGen_XORIS(ppcImlGenContext, opcode); + case 27: // XORIS + PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, true); break; - case 28: - PPCRecompilerImlGen_ANDI(ppcImlGenContext, opcode); + case 28: // ANDI + PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, false); break; - case 29: - PPCRecompilerImlGen_ANDIS(ppcImlGenContext, opcode); + case 29: // ANDIS + PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, true); break; case 31: // opcode category switch (PPC_getBits(opcode, 30, 10)) @@ -2367,8 +2172,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_CNTLZW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 28: - if (PPCRecompilerImlGen_AND(ppcImlGenContext, opcode) == false) + case 28: // AND + if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 32: @@ -2385,8 +2190,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true)) unsupportedInstructionFound = true; break; - case 60: - if (PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode) == false) + case 60: // ANDC + if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; case 75: @@ -2408,8 +2213,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true)) unsupportedInstructionFound = true; break; - case 124: - if (PPCRecompilerImlGen_NOR(ppcImlGenContext, opcode) == false) + case 124: // NOR + if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true)) unsupportedInstructionFound = true; break; case 136: @@ -2421,7 +2226,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 144: - PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode)) + unsupportedInstructionFound = true; break; case 150: if (!PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode)) @@ -2467,15 +2273,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false)) unsupportedInstructionFound = true; break; - case 284: - PPCRecompilerImlGen_EQV(ppcImlGenContext, opcode); + case 284: // EQV (alias to NXOR) + if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; break; case 311: // LHZUX if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true)) unsupportedInstructionFound = true; break; - case 316: - if (PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode) == false) + case 316: // XOR + if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 339: @@ -2506,8 +2313,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, true)) unsupportedInstructionFound = true; break; - case 444: - if (PPCRecompilerImlGen_OR(ppcImlGenContext, opcode) == false) + case 444: // OR + if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 459: @@ -2517,6 +2324,10 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_MTSPR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; + case 476: // NAND + if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; + break; case 491: if (PPCRecompilerImlGen_DIVW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; From b3676896a9fda3b44191276a8e84a0653c8415e6 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 30 Jan 2023 03:52:43 +0100 Subject: [PATCH 32/64] PPCRec: Unify code + misc RA preparation Whoopsie --- bin/keys.txt | 4 - .../Recompiler/BackendX64/BackendX64.cpp | 4 - .../Recompiler/IML/IMLRegisterAllocator.cpp | 312 ++++++++++-------- .../IML/IMLRegisterAllocatorRanges.cpp | 15 +- .../Recompiler/PPCRecompilerImlGen.cpp | 59 ++-- 5 files changed, 216 insertions(+), 178 deletions(-) delete mode 100644 bin/keys.txt diff --git a/bin/keys.txt b/bin/keys.txt deleted file mode 100644 index 8782dbfe7..000000000 --- a/bin/keys.txt +++ /dev/null @@ -1,4 +0,0 @@ -# this file contains keys needed for decryption of disc file system data (WUD/WUX) -# 1 key per line, any text after a '#' character is considered a comment -# the emulator will automatically pick the right key -541b9889519b27d363cd21604b97c67a # example key (can be deleted) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 7fcf5d991..4e045b67a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -737,16 +737,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if ( rRegResult == rRegOperand2 ) { // result = operand1 - result - // NEG result x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operand1 x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } else { - // copy operand1 to destination register before doing addition x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // sub operand2 x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 294fb0e3e..c52878b75 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -7,6 +7,8 @@ #include "../BackendX64/BackendX64.h" +#include + struct IMLRegisterAllocatorContext { IMLRegisterAllocatorParameters* raParam; @@ -212,11 +214,67 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe return minDistance; } -typedef struct +struct IMLRALivenessTimeline { - raLivenessSubrange_t* liveRangeList[64]; - sint32 liveRangesCount; -}raLiveRangeInfo_t; +// IMLRALivenessTimeline(raLivenessSubrange_t* subrangeChain) +// { +//#ifdef CEMU_DEBUG_ASSERT +// raLivenessSubrange_t* it = subrangeChain; +// raLivenessSubrange_t* prevIt = it; +// while (it) +// { +// cemu_assert_debug(prevIt->start.index <= it->start.index); +// prevIt = it; +// it = it->link_segmentSubrangesGPR.next; +// } +//#endif +// } + + IMLRALivenessTimeline() + { + } + + // manually add an active range + void AddActiveRange(raLivenessSubrange_t* subrange) + { + activeRanges.emplace_back(subrange); + } + + // remove all ranges from activeRanges with end <= instructionIndex + void ExpireRanges(sint32 instructionIndex) + { + expiredRanges.clear(); + size_t count = activeRanges.size(); + for (size_t f = 0; f < count; f++) + { + raLivenessSubrange_t* liverange = activeRanges[f]; + if (liverange->end.index <= instructionIndex) + { +#ifdef CEMU_DEBUG_ASSERT + if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) + assert_dbg(); // infinite subranges should not expire +#endif + expiredRanges.emplace_back(liverange); + // remove entry + activeRanges[f] = activeRanges[count-1]; + f--; + count--; + } + } + if(count != activeRanges.size()) + activeRanges.resize(count); + } + + std::span GetExpiredRanges() + { + return { expiredRanges.data(), expiredRanges.size() }; + } + + boost::container::small_vector activeRanges; + +private: + boost::container::small_vector expiredRanges; +}; bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) { @@ -244,10 +302,6 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IML subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - - //if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || - // (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || - // (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) if(IsRangeOverlapping(subrange, subrangeItr)) { if (subrangeItr->range->physicalRegister >= 0) @@ -312,84 +366,95 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } -void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) +{ + uint32 regId = regToSearch & 0xFF; + raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_perVirtualGPR[regId]; + while (subrangeItr) + { + if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) + return subrangeItr; + subrangeItr = subrangeItr->link_sameVirtualRegisterGPR.next; + } + return nullptr; +} + +void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) +{ + __debugbreak(); +} + +void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) // algorithm goes as follows: // 1) Iterate all instructions from beginning to end and keep a list of covering ranges - // 2) If we encounter an instruction with a fixed-register we: + // 2) If we encounter an instruction with a fixed register we: // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care - // assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already - // todo + // experimental code + //for (size_t i = 0; i < imlSegment->imlList.size(); i++) + //{ + // IMLInstruction& inst = imlSegment->imlList[i]; + // if (inst.type == PPCREC_IML_TYPE_R_R_R) + // { + // if (inst.operation == PPCREC_IML_OP_LEFT_SHIFT) + // { + // // get the virtual reg which needs to be assigned a fixed register + // //IMLUsedRegisters usedReg; + // //inst.CheckRegisterUsage(&usedReg); + // IMLReg rB = inst.op_r_r_r.regB; + // // rB needs to use RCX/ECX + // raLivenessSubrange_t* subrange = _GetSubrangeByInstructionIndexAndVirtualReg(imlSegment, rB, i); + // cemu_assert_debug(subrange->range->physicalRegister < 0); // already has a phys reg assigned + // // make sure RCX/ECX is free + // // split before (if needed) and after instruction so that we get a new 1-instruction long range for which we can assign the physical register + // raLivenessSubrange_t* instructionRange = subrange->start.index < i ? PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrange, i, false) : subrange; + // raLivenessSubrange_t* tailRange = PPCRecRA_splitLocalSubrange(ppcImlGenContext, instructionRange, i+1, false); + + // } + // } + //} } -bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); - - PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment); - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; + IMLRALivenessTimeline livenessTimeline; raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { sint32 currentIndex = subrangeItr->start.index; - // validate subrange PPCRecRA_debugValidateSubrange(subrangeItr); - // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= currentIndex && liverange->end.index != RA_INTER_RANGE_END) - { -#ifdef CEMU_DEBUG_ASSERT - if (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken) - assert_dbg(); // infinite subranges should not expire -#endif - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - } - // check if subrange already has register assigned + livenessTimeline.ExpireRanges(std::min(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges + // if subrange already has register assigned then add it to the active list and continue if (subrangeItr->range->physicalRegister >= 0) { // verify if register is actually available #ifdef CEMU_DEBUG_ASSERT - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto& liverangeItr : livenessTimeline.activeRanges) { - raLivenessSubrange_t* liverangeItr = liveInfo.liveRangeList[f]; - if (liverangeItr->range->physicalRegister == subrangeItr->range->physicalRegister) - { - // this should never happen because we try to preventively avoid register conflicts - assert_dbg(); - } + // check for register mismatch + cemu_assert_debug(liverangeItr->range->physicalRegister != subrangeItr->range->physicalRegister); } #endif - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // next + livenessTimeline.AddActiveRange(subrangeItr); subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - // find free register for this segment + // find free register for current subrangeItr and segment IMLPhysRegisterSet physRegSet = ctx.raParam->physicalRegisterPool; - - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto& liverangeItr : livenessTimeline.activeRanges) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - cemu_assert_debug(liverange->range->physicalRegister >= 0); - physRegSet.SetReserved(liverange->range->physicalRegister); + cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); + physRegSet.SetReserved(liverangeItr->range->physicalRegister); } // check intersections with other ranges and determine allowed registers IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) @@ -449,9 +514,8 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.localRangeHoleCutting.cost = INT_MAX; if (currentIndex >= 0) { - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index == RA_INTER_RANGE_END) continue; sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); @@ -513,12 +577,11 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(liveInfo.liveRangeList[f], currentIndex); + sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); if( distance < 2) continue; sint32 cost; @@ -580,9 +643,8 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for(auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; // only select candidates that clash with current subrange @@ -616,16 +678,14 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen } // assign register to range subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } return true; } -void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; @@ -633,6 +693,10 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext { maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); } + // assign fixed registers first + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt); + while (true) { bool done = false; @@ -642,7 +706,7 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext { if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ctx, ppcImlGenContext, segIt); + done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt); if (done == false) break; } @@ -654,12 +718,12 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext } } -typedef struct +struct subrangeEndingInfo_t { raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; sint32 subrangeCount; bool hasUndefinedEndings; -}subrangeEndingInfo_t; +}; void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) { @@ -759,14 +823,13 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) } } -void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; std::unordered_map virt2PhysRegMap; // key = virtual register, value = physical register - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; + IMLRALivenessTimeline livenessTimeline; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments @@ -775,8 +838,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, { if (subrangeItr->start.index == RA_INTER_RANGE_START) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); #ifdef CEMU_DEBUG_ASSERT // load GPR if (subrangeItr->_noLoad == false) @@ -797,41 +859,34 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, while(index < imlSegment->imlList.size() + 1) { // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + livenessTimeline.ExpireRanges(index); + for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= index) + // update translation table + if (virtualReg2PhysReg[expiredRange->range->virtualRegister] == -1) + assert_dbg(); + virtualReg2PhysReg[expiredRange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(expiredRange->range->virtualRegister); + // store GPR if required + // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed + if (expiredRange->hasStore) { - // update translation table - if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) - assert_dbg(); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(liverange->range->virtualRegister); - // store GPR if required - // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed - if (liverange->hasStore) - { - PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); - index++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; + PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), expiredRange->range->physicalRegister, expiredRange->range->name); + index++; } } + // load new ranges subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->start.index == index) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // load GPR // similar to stores, any loads for the next segment need to happen before the suffix instruction - // however, starting 17-12-2022 ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) - // the limitation that name loads (for the follow-up segments) need to happen before the suffix instruction require that the range also reflects this, otherwise the RA would erroneously assume registers to be available during the suffix instruction + // however, ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) + // this is to prevent the RA from inserting store/load instructions after the suffix instruction if (imlSegment->HasSuffixInstruction()) { cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex()); @@ -855,35 +910,25 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // next iml instruction index++; } - // expire infinite subranges (subranges that cross the segment border) + // expire infinite subranges (subranges which cross the segment border) sint32 storeLoadListLength = 0; raLoadStoreInfo_t loadStoreList[IML_RA_VIRT_REG_COUNT_MAX]; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); + for (auto liverange : livenessTimeline.GetExpiredRanges()) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index == RA_INTER_RANGE_END) - { - // update translation table - cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(liverange->range->virtualRegister); - // store GPR - if (liverange->hasStore) - { - loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = liverange->range->name; - storeLoadListLength++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - else + // update translation table + cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); + virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); + // store GPR + if (liverange->hasStore) { - cemu_assert_suspicious(); + loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; + loadStoreList[storeLoadListLength].registerName = liverange->range->name; + storeLoadListLength++; } } + cemu_assert_debug(livenessTimeline.activeRanges.empty()); if (storeLoadListLength > 0) { PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); @@ -895,8 +940,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, { if (subrangeItr->start.index == RA_INTER_RANGE_END) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // load GPR if (subrangeItr->_noLoad == false) { @@ -918,20 +962,20 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_GenerateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); + IMLRA_GenerateSegmentInstructions(ppcImlGenContext, imlSegment); } } -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code @@ -986,16 +1030,16 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext IMLRegisterAllocatorContext ctx; ctx.raParam = &raParam; - PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); - PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ctx, ppcImlGenContext); + IMLRA_CalculateLivenessRanges(ppcImlGenContext); + IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext); + IMLRA_AssignRegisters(ctx, ppcImlGenContext); - PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); - PPCRecRA_generateMoveInstructions(ppcImlGenContext); + IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ppcImlGenContext); PPCRecRA_deleteAllRanges(ppcImlGenContext); } @@ -1028,7 +1072,7 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) { // for each register calculate min/max index of usage range within each segment for (IMLSegment* segIt : ppcImlGenContext->segmentList2) @@ -1338,7 +1382,7 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) } } -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) { // merge close ranges PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); @@ -1377,7 +1421,7 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) subrange->_noLoad = true; } -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) { // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore // first do a per-subrange pass diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 14159c772..071a1d5e8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -225,9 +225,9 @@ void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} #endif // split subrange at the given index -// After the split there will be two ranges/subranges: -// head -> subrange is shortned to end at splitIndex -// tail -> a new subrange that reaches from splitIndex to the end of the original subrange +// After the split there will be two ranges and subranges: +// head -> subrange is shortened to end at splitIndex (exclusive) +// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange // if head has a physical register assigned it will not carry over to tail // The return value is the tail subrange // If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations @@ -236,7 +236,9 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC { // validation #ifdef CEMU_DEBUG_ASSERT - if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) + //if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) + // assert_dbg(); + if (subrange->start.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) assert_dbg(); if (subrange->start.index >= splitIndex) assert_dbg(); @@ -282,6 +284,11 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC tailSubrange->start.index = tailSubrange->list_locations.front().index; } } + else + { + // set head range to end at split index + subrange->end.index = splitIndex; + } return tailSubrange; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 8d8a2cb5b..3cd1a1c65 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1023,7 +1023,7 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { // unlike SRAWI, for SRAW the shift range is 0-63 (masked to 6 bits) - // but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32 + // but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); @@ -1093,9 +1093,9 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg registerRS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg registerRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg registerRA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1106,13 +1106,12 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, regA, regS, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1120,12 +1119,11 @@ bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, regA, regS); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1133,11 +1131,11 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, regA, regS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1145,12 +1143,11 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, registerRA, registerRS); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, regA, regS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1158,13 +1155,11 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 registerRD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, registerRD, registerRA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, regD, regA); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } From 0577effe41b0a5c2c5b558ec7a454559b6932c1a Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 30 Jan 2023 06:01:39 +0100 Subject: [PATCH 33/64] PPCRec: Use IMLReg type in FPR RA --- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 4 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 1 - .../Recompiler/PPCRecompilerIntermediate.cpp | 65 ------------------- 3 files changed, 2 insertions(+), 68 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index dd445b2c9..247cfa8b8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -261,7 +261,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) for (sint32 i = 0; i < segIt->imlList.size(); i++) { segIt->imlList[i].CheckRegisterUsage(®istersUsed); - sint32 accessedTempReg[5]; + IMLReg accessedTempReg[5]; // intermediate FPRs accessedTempReg[0] = registersUsed.readFPR1; accessedTempReg[1] = registersUsed.readFPR2; @@ -270,7 +270,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) accessedTempReg[4] = registersUsed.writtenFPR1; for (sint32 f = 0; f < 5; f++) { - if (accessedTempReg[f] == -1) + if (accessedTempReg[f] == IMLREG_INVALID) continue; uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index d951fb1d0..ad4230015 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -101,6 +101,5 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general -void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 0a87a1e7b..468af5b2a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -1,71 +1,6 @@ #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) -{ - __debugbreak(); - return nullptr; - //for(IMLSegment* segIt : ppcImlGenContext->segmentList2) - //{ - // if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) - // { - // return segIt; - // } - //} - //debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); - //return nullptr; -} - -void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) -{ - __debugbreak(); // outdated - - //size_t segCount = ppcImlGenContext->segmentList2.size(); - //for(size_t s=0; ssegmentList2[s]; - - // bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); - // IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; - // // handle empty segment - // if( imlSegment->imlList.empty()) - // { - // if (isLastSegment == false) - // IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment - // else - // imlSegment->nextSegmentIsUncertain = true; - // continue; - // } - // // check last instruction of segment - // IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); - // if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - // { - // // find destination segment by ppc jump address - // IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - // if( jumpDestSegment ) - // { - // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); - // } - // else - // { - // imlSegment->nextSegmentIsUncertain = true; - // } - // } - // else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - // { - // // currently we assume that the next segment is unknown for all macros - // imlSegment->nextSegmentIsUncertain = true; - // } - // else - // { - // // all other instruction types do not branch - // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - // } - //} -} - void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) { size_t initialSegmentCount = ppcImlGenContext->segmentList2.size(); From 59bd84bc7714bf416359b622eea64cd210776c22 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 30 Jan 2023 06:30:29 +0100 Subject: [PATCH 34/64] PPCRec: Use agnostic breakpoints --- .../Recompiler/BackendX64/BackendX64.cpp | 6 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 1 - .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 60 ------------------- .../Recompiler/IML/IMLRegisterAllocator.cpp | 2 +- .../Recompiler/PPCRecompilerImlGen.cpp | 4 +- 5 files changed, 6 insertions(+), 67 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 4e045b67a..95591c29a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -642,7 +642,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - __debugbreak(); + DEBUG_BREAK; //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); //for(sint32 f=0; f<32; f++) @@ -653,7 +653,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - __debugbreak(); + DEBUG_BREAK; //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); //for (sint32 f = 0; f < 32; f++) @@ -896,7 +896,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, //if (rRegResult == rRegOperand2) //{ // if (rRegResult != rRegOperand1) - // __debugbreak(); // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2) + // DEBUG_BREAK; // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2) //} //if(rRegOperand1 != rRegResult) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index fe81f574e..f1820f570 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -12,7 +12,6 @@ struct PPCRecCRTracking_t bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); -void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking); // optimizer passes // todo - rename diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index eac363711..ca438c3ce 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -89,63 +89,3 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return false; } - -void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) -{ - __debugbreak(); - //crTracking->readCRBits = 0; - //crTracking->writtenCRBits = 0; - //if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - //{ - // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - // { - // uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - // crTracking->readCRBits = (crBitFlag); - // } - //} - //else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - //{ - // uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - // crTracking->readCRBits = crBitFlag; - //} - //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - //{ - // crTracking->readCRBits = 0xFFFFFFFF; - //} - //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - //{ - // crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - //} - //else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - //{ - // if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - // imlInstruction->operation == PPCREC_IML_OP_CR_SET) - // { - // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - // crTracking->writtenCRBits = crBitFlag; - // } - // else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - // imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - // imlInstruction->operation == PPCREC_IML_OP_CR_AND || - // imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - // { - // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - // crTracking->writtenCRBits = crBitFlag; - // crBitFlag = 1 << (imlInstruction->op_cr.crA); - // crTracking->readCRBits = crBitFlag; - // crBitFlag = 1 << (imlInstruction->op_cr.crB); - // crTracking->readCRBits |= crBitFlag; - // } - // else - // assert_dbg(); - //} - //else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) - //{ - // crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); - //} - //else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - //{ - // // overwrites CR0 - // crTracking->writtenCRBits |= (0xF << 0); - //} -} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index c52878b75..5d11d0c2a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -381,7 +381,7 @@ raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* im void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) { - __debugbreak(); + DEBUG_BREAK; } void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 3cd1a1c65..c37126d57 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -3079,7 +3079,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction { debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", (int)segIndex); IMLDebug_Dump(&ppcImlGenContext); - __debugbreak(); + DEBUG_BREAK; } } } @@ -3089,7 +3089,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction { debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", (int)segIndex); IMLDebug_Dump(&ppcImlGenContext); - __debugbreak(); + DEBUG_BREAK; } } if (seg->nextSegmentBranchNotTaken) From 154aef0c1b6e56e3339d0e78292d66a5367e4786 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 30 Jan 2023 06:57:48 +0100 Subject: [PATCH 35/64] PPCRec: Fix capitalization in include --- src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 618f71421..efe929d0e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -25,7 +25,7 @@ void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v) x64GenContext->emitter->_emitU64(v); } -#include "x64Emit.hpp" +#include "X64Emit.hpp" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32) { From df74b997611ec1fed4239f3cc77bbc89cf2cb120 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 2 Feb 2023 17:18:36 +0100 Subject: [PATCH 36/64] PPCRec: Initial support for typed registers --- .../Recompiler/BackendX64/BackendX64.cpp | 259 +++++----- .../Recompiler/BackendX64/BackendX64FPU.cpp | 262 +++++----- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 2 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 48 +- .../Recompiler/IML/IMLInstruction.cpp | 284 +++++----- .../Espresso/Recompiler/IML/IMLInstruction.h | 239 ++++++--- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 179 ++++--- .../Recompiler/IML/IMLRegisterAllocator.cpp | 41 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 4 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 10 +- .../Recompiler/PPCRecompilerImlGen.cpp | 266 +++++----- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 489 +++++++++--------- 12 files changed, 1126 insertions(+), 957 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 95591c29a..7ba3d519f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,14 +8,22 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" -static x86Assembler64::GPR32 _reg32(sint8 physRegId) +static x86Assembler64::GPR32 _reg32(IMLReg physReg) { - return (x86Assembler64::GPR32)physRegId; + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); + return (x86Assembler64::GPR32)physReg.GetRegID(); } -static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +static uint32 _reg64(IMLReg physReg) { - return (x86Assembler64::GPR8_REX)physRegId; + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64); + return physReg.GetRegID(); +} + +static x86Assembler64::GPR8_REX _reg8(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit + return (x86Assembler64::GPR8_REX)physReg.GetRegID(); } static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) @@ -28,6 +36,11 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } +static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId) { return (x86Assembler64::GPR64)regId; @@ -132,7 +145,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { - uint32 branchDstReg = imlInstruction->op_macro.param; + uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg); if(X86_REG_RDX != branchDstReg) x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg); // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX @@ -334,11 +347,16 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, */ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; - sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID(); + IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID(); + IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID(); if( indexed && realRegisterMem == realRegisterMem2 ) { return false; @@ -439,11 +457,16 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p */ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; - sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID(); + IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID(); + IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER; if (indexed) - realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID(); if (indexed && realRegisterMem == realRegisterMem2) { @@ -542,39 +565,42 @@ bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + auto regR = _reg32(imlInstruction->op_r_r.regR); + auto regA = _reg32(imlInstruction->op_r_r.regA); + if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { // registerResult = registerA - if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + if (regR != regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { - if (imlInstruction->op_r_r.regA != imlInstruction->op_r_r.regR) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); // if movbe is available we can move and swap in a single instruction? - x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.regR); + if (regA != regR) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction? + x64Gen_bswap_reg64Lower32bit(x64GenContext, regR); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.regR, reg32ToReg16(imlInstruction->op_r_r.regA)); + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA)); } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { // copy register content if different registers - if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); + if( regR != regA ) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_not_reg64Low32(x64GenContext, regR); } else if (imlInstruction->operation == PPCREC_IML_OP_NEG) { // copy register content if different registers - if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); + if (regR != regA) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_neg_reg64Low32(x64GenContext, regR); } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { @@ -582,29 +608,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { - x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); + x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA); } else { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regA, imlInstruction->op_r_r.regA); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); - x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR); - x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32-1); + x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA); + x64Gen_neg_reg64Low32(x64GenContext, regR); + x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1); sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { - if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA ) + if( regR != regA ) { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regR); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -613,7 +639,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -630,15 +656,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + auto regR = _reg32(imlInstruction->op_r_immS32.regR); + if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - if( (imlInstruction->op_r_immS32.immS32&0x80) ) - assert_dbg(); // should not happen - x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.regR, (uint8)imlInstruction->op_r_immS32.immS32); + cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); + x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { @@ -698,12 +725,13 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR); + auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA); + auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB); + if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -721,9 +749,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( rRegOperand1 == rRegOperand2 ) { // result = operand1 - operand1 -> 0 @@ -748,28 +773,22 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegA = imlInstruction->op_r_r_r.regA; - sint32 rRegB = imlInstruction->op_r_r_r.regB; - if (rRegResult == rRegB) - std::swap(rRegA, rRegB); + if (rRegResult == rRegOperand2) + std::swap(rRegOperand1, rRegOperand2); - if (rRegResult != rRegA) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA); + if (rRegResult != rRegOperand1) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); if (imlInstruction->operation == PPCREC_IML_OP_OR) - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); else if (imlInstruction->operation == PPCREC_IML_OP_AND) - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); else - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand1 * registerOperand2 - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -789,9 +808,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) { @@ -831,9 +847,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; // todo: Use BMI2 rotate if available // check if CL/ECX/RCX is available if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX ) @@ -872,10 +885,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, // since our register allocator doesn't support instruction based fixed phys registers yet // we'll instead have to temporarily shuffle registers around - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; - // we use BMI2's shift instructions until the RA can assign fixed registers if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { @@ -947,10 +956,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register @@ -981,10 +986,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - sint32 rRegResult = imlInstruction->op_r_r_r.regR; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB; - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register @@ -1102,37 +1103,35 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - sint32 regResult = imlInstruction->op_r_r_s32.regR; - sint32 regOperand = imlInstruction->op_r_r_s32.regA; + auto regR = _reg32(imlInstruction->op_r_r_s32.regR); + auto regA = _reg32(imlInstruction->op_r_r_s32.regA); uint32 immS32 = imlInstruction->op_r_r_s32.immS32; if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - sint32 rRegResult = imlInstruction->op_r_r_s32.regR; - sint32 rRegOperand = imlInstruction->op_r_r_s32.regA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if(regResult != regOperand) - x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); - x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32); + if(regR != regA) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32); } else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { - if (regResult != regOperand) - x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); - x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); + if (regR != regA) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32); } else if (imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_XOR) { - if (regResult != regOperand) - x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + if (regR != regA) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); if (imlInstruction->operation == PPCREC_IML_OP_AND) - x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32); + x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32); else if (imlInstruction->operation == PPCREC_IML_OP_OR) - x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32); + x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32); else // XOR - x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32); + x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) { @@ -1143,41 +1142,39 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.regA); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); // rotate destination register if( sh ) x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.regR, ~mask); + x64Gen_and_reg64Low32_imm32(x64GenContext, regR, ~mask); // AND temporary rS register with mask x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.regR, REG_RESV_TEMP); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 - sint32 rRegResult = imlInstruction->op_r_r_s32.regR; - sint32 rRegOperand = imlInstruction->op_r_r_s32.regA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize - if( rRegResult != rRegOperand ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); + if( regR != regA ) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP); } else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - if( imlInstruction->op_r_r_s32.regA != imlInstruction->op_r_r_s32.regR ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.regA); + if( regA != regR ) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) - x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); + x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) - x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); + x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); else // RIGHT_SHIFT_S - x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32); + x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); } else { @@ -1236,50 +1233,52 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; + auto regR = _reg64(imlInstruction->op_r_name.regR); + if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else if (name == PPCREC_NAME_XER_CA) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } else if (name == PPCREC_NAME_XER_SO) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); } else assert_dbg(); @@ -1288,50 +1287,52 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; + auto regR = _reg64(imlInstruction->op_r_name.regR); + if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); } else if (name == PPCREC_NAME_XER_CA) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name == PPCREC_NAME_XER_SO) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR))); + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); } else assert_dbg(); @@ -1432,16 +1433,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) - { codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) { if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) - { codeGenerationFailed = true; - } } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 8c591c971..0942842de 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -7,7 +7,14 @@ uint32 _regF64(IMLReg r) { - return (uint32)r; + cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::F64); + return (uint32)r.GetRegID(); +} + +uint32 _regI32(IMLReg r) +{ + cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); + return (uint32)r.GetRegID(); } static x86Assembler64::GPR32 _reg32(sint8 physRegId) @@ -15,9 +22,10 @@ static x86Assembler64::GPR32 _reg32(sint8 physRegId) return (x86Assembler64::GPR32)physRegId; } -static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +static x86Assembler64::GPR8_REX _reg8(IMLReg r) { - return (x86Assembler64::GPR8_REX)physRegId; + cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers + return (x86Assembler64::GPR8_REX)r.GetRegID(); } static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) @@ -33,13 +41,14 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; + uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -50,13 +59,14 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; + uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -64,10 +74,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct } } -void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, sint32 registerGQR) +void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR) { // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); // extract scale field and multiply by 16 to get array offset x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4)); @@ -91,7 +101,7 @@ void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcI // generate code for PSQ load for a particular type // if scaleGQR is -1 then a scale of 1.0 is assumed (no scale) -void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1) +void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) { if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1) { @@ -227,16 +237,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, // convert the two integers to doubles x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR)); // scale - if (registerGQR >= 0) + if (registerGQR.IsValid()) PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR); } } -void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR) +void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) { bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1); // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); // extract load type field x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); @@ -292,11 +302,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen // load from memory bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; - sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; + sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData); + sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; + realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) @@ -417,7 +427,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio return true; } -void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1) +void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) { bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || @@ -425,7 +435,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 || mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1); bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - if (registerGQR >= 0) + if (registerGQR.IsValid()) { // move to temporary xmm and update registerXMM x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); @@ -543,11 +553,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext } } -void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR) +void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) { bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1); // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); // extract store type field x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // jump cases @@ -602,11 +612,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe // store to memory bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; - sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; + sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData); + sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; + realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) { @@ -713,163 +723,166 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); + if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) { // VPUNPCKHQDQ - if (imlInstruction->op_fpr_r_r.regR == imlInstruction->op_fpr_r_r.regA) + if (regR == regA) { // unpack top to bottom and top - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA); } //else if ( hasAVXSupport ) //{ // // unpack top to bottom and top with non-destructive destination // // update: On Ivy Bridge this causes weird stalls? - // x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, imlInstruction->op_fpr_r_r.registerOperand); + // x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand); //} else { // move top to bottom - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA); // duplicate bottom - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regR); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR); } } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) { - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) { - if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR); + if( regR != regA ) + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); + _swapPS0PS1(x64GenContext, regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) { - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 2); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) { // use unpckhpd here? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 3); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3); + _swapPS0PS1(x64GenContext, regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) { - x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { - x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) { - x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) { - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { - x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.regA); + x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register - x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_TEMP); + x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // call assembly routine to calculate accurate FRES result in XMM15 x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // copy result to bottom and top half of result register - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // call assembly routine to calculate accurate FRSQRTE result in XMM15 x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // copy result to bottom of result register - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) { // copy register - if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) + if( regR != regA ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); } // toggle sign bits - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); + x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) { // copy register - if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA ) + if( regR != regA ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); } // set sign bit to 0 - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); + x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { // calculate bottom half of result - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); else x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // calculate top half of result // todo - this top to bottom copy can be optimized? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA, 3); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); // copy bottom to top + x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top } else { @@ -882,78 +895,82 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA); + uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB); + if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { - if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA) + if (regR == regA) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB); } - else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB) + else if (regR == regB) { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) { // todo: Use AVX 3-operand VADDSD if available - if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA) + if (regR == regA) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); } - else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB) + else if (regR == regB) { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); } else { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) { // registerResult = registerOperandA - registerOperandB - if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA ) + if( regR == regA ) { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); } else if (g_CPUFeatures.x86.avx) { - x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB); } - else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB ) + else if( regR == regB ) { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB); - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); } else { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); } } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA ) + if( regR == regA ) { - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB); } - else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB ) + else if( regR == regB ) { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); } else { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } else @@ -965,32 +982,37 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA); + uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB); + uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC); + if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { // todo: Investigate if there are other optimizations possible if the operand registers overlap // generic case // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); // 3) Interleave top of frTemp and frC - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC); + x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) { // todo: Investigate if there are other optimizations possible if the operand registers overlap // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB); + x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); // 3) Copy bottom from frC - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); //// 4) Swap bottom and top half //x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP); + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); //float s0 = (float)hCPU->fpr[frC].fp0; //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); @@ -999,48 +1021,48 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); + x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC); sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { // select bottom - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); + x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C bottom - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC); sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B bottom PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); // select top - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); // copy top to bottom (todo: May cause stall?) + x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?) x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C top - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC, 2); + //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2); sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B top PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB, 2); + //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB); + x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2); // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); } @@ -1050,38 +1072,40 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + uint32 regR = _regF64(imlInstruction->op_fpr_r.regR); + if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); + x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) { - x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) { // convert to 32bit single - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR); // convert back to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) { // convert to 32bit singles - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR); // convert back to 64bit doubles - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { // convert bottom to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); // copy to top half - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR); + x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index ca438c3ce..1b348c4cf 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -26,7 +26,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) // for non-BDNZ loops, check for common patterns // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB) // this catches most loops with load-update and store-update instructions, but also those with decrementing counters - FixedSizeList list_modifiedRegisters; + FixedSizeList list_modifiedRegisters; for (const IMLInstruction& instIt : imlSegment->imlList) { if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) ) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 54aa85a35..9511a5a7c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -41,20 +41,22 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return _tempOpcodename; } -void IMLDebug_AppendRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false) +void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) { + uint32 regId = virtualRegister.GetRegID(); + DEBUG_BREAK; // todo (print type) if (isLast) { - if (virtualRegister < 10) - strOutput.addFmt("t{} ", virtualRegister); + if (regId < 10) + strOutput.addFmt("t{} ", regId); else - strOutput.addFmt("t{}", virtualRegister); + strOutput.addFmt("t{}", regId); return; } - if (virtualRegister < 10) - strOutput.addFmt("t{} , ", virtualRegister); + if (regId < 10) + strOutput.addFmt("t{} , ", regId); else - strOutput.addFmt("t{}, ", virtualRegister); + strOutput.addFmt("t{}, ", regId); } void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) @@ -147,6 +149,12 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "ukn"; } +std::string IMLDebug_GetRegName(IMLReg r) +{ + cemu_assert_unimplemented(); + return ""; +} + void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -197,7 +205,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.addFmt("name_{} (", inst.op_r_name.regR, inst.op_r_name.name); + strOutput.addFmt("name_{} (", inst.op_r_name.regR.GetRegID()); if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); @@ -334,9 +342,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.registerMem2.GetRegID()); else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); } else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { @@ -391,7 +399,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME) { - strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR, inst.op_r_name.name); + strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR.GetRegID(), inst.op_r_name.name); if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) { strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); @@ -417,16 +425,16 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else strOutput.add("ukn"); - strOutput.addFmt(") = fpr_t{}", inst.op_r_name.regR); + strOutput.addFmt(") = {}", IMLDebug_GetRegName(inst.op_r_name.regR)); } else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) { - strOutput.addFmt("fpr_t{} = ", inst.op_storeLoad.registerData); + strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); if (inst.op_storeLoad.flags2.signExtend) strOutput.add("S"); else strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); if (inst.op_storeLoad.flags2.notExpanded) { strOutput.addFmt(" "); @@ -438,23 +446,23 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add("S"); else strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - strOutput.addFmt(" = fpr_t{} mode {}", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.regR, inst.op_fpr_r_r.regA); + strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.regR, inst.op_fpr_r_r_r_r.regA, inst.op_fpr_r_r_r_r.regB, inst.op_fpr_r_r_r_r.regC); + strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.regR, inst.op_fpr_r_r_r.regA, inst.op_fpr_r_r_r.regB); + strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { @@ -462,7 +470,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - strOutput.addFmt("t{} ", inst.op_conditional_r_s32.regR); + strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); bool displayAsHex = false; if (inst.operation == PPCREC_IML_OP_ASSIGN) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index b7ab28e90..61939a244 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -161,7 +161,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (operation == PPCREC_IML_MACRO_B_TO_REG) { - registersUsed->readGPR1 = op_macro.param; + cemu_assert_debug(op_macro.paramReg.IsValid()); + registersUsed->readGPR1 = op_macro.paramReg; } else cemu_assert_unimplemented(); @@ -188,29 +189,29 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_LOAD) { registersUsed->writtenGPR1 = op_storeLoad.registerData; - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; } else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) { registersUsed->writtenGPR1 = op_storeLoad.registerData; - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem2; } else if (type == PPCREC_IML_TYPE_STORE) { registersUsed->readGPR1 = op_storeLoad.registerData; - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem; } else if (type == PPCREC_IML_TYPE_STORE_INDEXED) { registersUsed->readGPR1 = op_storeLoad.registerData; - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem; - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR3 = op_storeLoad.registerMem2; } else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) @@ -235,20 +236,20 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // fpr load operation registersUsed->writtenFPR1 = op_storeLoad.registerData; // address is in gpr register - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; // determine partially written result switch (op_storeLoad.mode) { case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); registersUsed->readGPR2 = op_storeLoad.registerGQR; break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same registersUsed->readFPR4 = op_storeLoad.registerData; - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: @@ -261,7 +262,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; default: cemu_assert_unimplemented(); @@ -272,21 +273,21 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // fpr load operation registersUsed->writtenFPR1 = op_storeLoad.registerData; // address is in gpr registers - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem2; // determine partially written result switch (op_storeLoad.mode) { case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); registersUsed->readGPR3 = op_storeLoad.registerGQR; break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); registersUsed->readFPR4 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: @@ -299,7 +300,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; default: cemu_assert_unimplemented(); @@ -309,18 +310,18 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { // fpr store operation registersUsed->readFPR1 = op_storeLoad.registerData; - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); registersUsed->readGPR2 = op_storeLoad.registerGQR; break; default: - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; } } @@ -329,20 +330,20 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // fpr store operation registersUsed->readFPR1 = op_storeLoad.registerData; // address is in gpr registers - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem2; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); registersUsed->readGPR3 = op_storeLoad.registerGQR; break; default: - cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); + cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; } } @@ -473,92 +474,107 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } } -#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) +//#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) +IMLReg replaceRegisterId(IMLReg reg, IMLRegID oldId, IMLRegID newId) +{ + if (reg.GetRegID() != oldId) + return reg; + reg.SetRegID(newId); + return reg; +} -sint32 replaceRegisterMultiple(sint32 reg, const std::unordered_map& translationTable) +IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable) { - const auto& it = translationTable.find(reg); + if (reg.IsInvalid()) + return reg; + const auto& it = translationTable.find(reg.GetRegID()); cemu_assert_debug(it != translationTable.cend()); - return it->second; + IMLReg alteredReg = reg; + alteredReg.SetRegID(it->second); + return alteredReg; } -sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) +IMLReg replaceRegisterIdMultiple(IMLReg reg, IMLReg match[4], IMLReg replaced[4]) { // deprecated but still used for FPRs for (sint32 i = 0; i < 4; i++) { - if (match[i] < 0) + if (match[i].IsInvalid()) continue; - if (reg == match[i]) + if (reg.GetRegID() == match[i].GetRegID()) { + cemu_assert_debug(reg.GetBaseFormat() == match[i].GetBaseFormat()); + cemu_assert_debug(reg.GetRegFormat() == match[i].GetRegFormat()); + cemu_assert_debug(reg.GetBaseFormat() == replaced[i].GetBaseFormat()); + cemu_assert_debug(reg.GetRegFormat() == replaced[i].GetRegFormat()); + return replaced[i]; } } return reg; } -//void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) -void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable) +void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable) { if (type == PPCREC_IML_TYPE_R_NAME) { - op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable); + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_NAME_R) { - op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable); + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_R_R) { - op_r_r.regR = replaceRegisterMultiple(op_r_r.regR, translationTable); - op_r_r.regA = replaceRegisterMultiple(op_r_r.regA, translationTable); + op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable); + op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable); } else if (type == PPCREC_IML_TYPE_R_S32) { - op_r_immS32.regR = replaceRegisterMultiple(op_r_immS32.regR, translationTable); + op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - op_conditional_r_s32.regR = replaceRegisterMultiple(op_conditional_r_s32.regR, translationTable); + op_conditional_r_s32.regR = replaceRegisterIdMultiple(op_conditional_r_s32.regR, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32) { - op_r_r_s32.regR = replaceRegisterMultiple(op_r_r_s32.regR, translationTable); - op_r_r_s32.regA = replaceRegisterMultiple(op_r_r_s32.regA, translationTable); + op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable); + op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) { - op_r_r_s32_carry.regR = replaceRegisterMultiple(op_r_r_s32_carry.regR, translationTable); - op_r_r_s32_carry.regA = replaceRegisterMultiple(op_r_r_s32_carry.regA, translationTable); - op_r_r_s32_carry.regCarry = replaceRegisterMultiple(op_r_r_s32_carry.regCarry, translationTable); + op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable); + op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable); + op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_R) { - op_r_r_r.regR = replaceRegisterMultiple(op_r_r_r.regR, translationTable); - op_r_r_r.regA = replaceRegisterMultiple(op_r_r_r.regA, translationTable); - op_r_r_r.regB = replaceRegisterMultiple(op_r_r_r.regB, translationTable); + op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable); + op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable); + op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) { - op_r_r_r_carry.regR = replaceRegisterMultiple(op_r_r_r_carry.regR, translationTable); - op_r_r_r_carry.regA = replaceRegisterMultiple(op_r_r_r_carry.regA, translationTable); - op_r_r_r_carry.regB = replaceRegisterMultiple(op_r_r_r_carry.regB, translationTable); - op_r_r_r_carry.regCarry = replaceRegisterMultiple(op_r_r_r_carry.regCarry, translationTable); + op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable); + op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable); + op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable); + op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE) { - op_compare.regR = replaceRegisterMultiple(op_compare.regR, translationTable); - op_compare.regA = replaceRegisterMultiple(op_compare.regA, translationTable); - op_compare.regB = replaceRegisterMultiple(op_compare.regB, translationTable); + op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable); + op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable); + op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE_S32) { - op_compare_s32.regR = replaceRegisterMultiple(op_compare_s32.regR, translationTable); - op_compare_s32.regA = replaceRegisterMultiple(op_compare_s32.regA, translationTable); + op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable); + op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - op_conditional_jump.registerBool = replaceRegisterMultiple(op_conditional_jump.registerBool, translationTable); + op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable); } else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { @@ -576,7 +592,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (operation == PPCREC_IML_MACRO_B_TO_REG) { - op_macro.param = replaceRegisterMultiple(op_macro.param, translationTable); + op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable); } else { @@ -585,40 +601,40 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (type == PPCREC_IML_TYPE_LOAD) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } } else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + if (op_storeLoad.registerMem2.IsValid()) + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_STORE) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } else if (type == PPCREC_IML_TYPE_STORE_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + if (op_storeLoad.registerMem2.IsValid()) + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable); - op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable); - op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable); - op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable); + op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable); + op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable); + op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); + op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -630,54 +646,54 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } - if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerGQR.IsValid()) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); } - if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerGQR.IsValid()) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE) { - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } - if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerGQR.IsValid()) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem.IsValid()) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); } - if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerMem2.IsValid()) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); } - if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) + if (op_storeLoad.registerGQR.IsValid()) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_R_R) @@ -694,7 +710,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { - op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable); + op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); } else { @@ -702,7 +718,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl } } -void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) +void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]) { if (type == PPCREC_IML_TYPE_R_NAME) { @@ -766,54 +782,54 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R) { - op_fpr_r_r.regR = replaceRegisterMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.regA = replaceRegisterMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { - op_fpr_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - op_fpr_r_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regC = replaceRegisterMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R) { - op_fpr_r.regR = replaceRegisterMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { - op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); } else { @@ -821,7 +837,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist } } -void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced) +void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced) { if (type == PPCREC_IML_TYPE_R_NAME) { @@ -885,49 +901,49 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); + op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { - op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { - op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R) { - op_fpr_r_r.regR = replaceRegister(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.regA = replaceRegister(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regR = replaceRegisterId(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r.regA = replaceRegisterId(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { - op_fpr_r_r_r.regR = replaceRegister(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regA = replaceRegister(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regB = replaceRegister(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - op_fpr_r_r_r_r.regR = replaceRegister(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regA = replaceRegister(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regB = replaceRegister(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regC = replaceRegister(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r_r_r_r.regC = replaceRegisterId(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_FPR_R) { - op_fpr_r.regR = replaceRegister(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); + op_fpr_r.regR = replaceRegisterId(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index feb494b29..9dec696d3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -1,7 +1,103 @@ #pragma once -using IMLReg = uint8; -inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; +using IMLRegID = uint16; // 16 bit ID + +// format of IMLReg: +// 0-15 (16 bit) IMLRegID +// 19-23 (5 bit) Offset In elements, for SIMD registers +// 24-27 (4 bit) IMLRegFormat RegFormat +// 28-31 (4 bit) IMLRegFormat BaseFormat + +enum class IMLRegFormat : uint8 +{ + INVALID_FORMAT, + I64, + I32, + I16, + I8, + // I1 ? + F64, + F32 +}; + +class IMLReg +{ +public: + IMLReg() + { + m_raw = 0; // 0 is invalid + } + + IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId) + { + m_raw = 0; + m_raw |= ((uint8)baseRegFormat << 28); + m_raw |= ((uint8)regFormat << 24); + m_raw |= (uint32)regId; + } + + IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId) + { + DEBUG_BREAK; + //m_raw = 0; + //m_raw |= ((uint8)baseRegFormat << 28); + //m_raw |= ((uint8)viewFormat << 24); + //m_raw |= (uint32)regId; + } + + IMLReg(const IMLReg& other) : m_raw(other.m_raw) {} + + IMLRegFormat GetBaseFormat() const + { + return (IMLRegFormat)((m_raw >> 28) & 0xF); + } + + IMLRegFormat GetRegFormat() const + { + return (IMLRegFormat)((m_raw >> 24) & 0xF); + } + + IMLRegID GetRegID() const + { + cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT); + cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT); + return (IMLRegID)(m_raw & 0xFFFF); + } + + void SetRegID(IMLRegID regId) + { + cemu_assert_debug(regId <= 0xFFFF); + m_raw &= ~0xFFFF; + m_raw |= (uint32)regId; + } + + bool IsInvalid() const + { + return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT; + } + + bool IsValid() const + { + return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT; + } + + bool IsValidAndSameRegID(IMLRegID regId) const + { + return IsValid() && GetRegID() == regId; + } + + // risky + bool operator==(const IMLReg& other) const + { + //__debugbreak(); + return m_raw == other.m_raw; + } + +private: + uint32 m_raw; +}; + +static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0); using IMLName = uint32; @@ -230,6 +326,8 @@ enum struct IMLUsedRegisters { + IMLUsedRegisters() {}; + // GPR union { @@ -256,59 +354,63 @@ struct IMLUsedRegisters }; }; - bool IsGPRWritten(IMLReg imlReg) const + bool IsBaseGPRWritten(IMLReg imlReg) const { - cemu_assert_debug(imlReg != IMLREG_INVALID); - return writtenGPR1 == imlReg || writtenGPR2 == imlReg; + cemu_assert_debug(imlReg.IsValid()); + auto regId = imlReg.GetRegID(); + if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return true; + if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + return true; + return false; } template void ForEachWrittenGPR(Fn F) const { - if (writtenGPR1 != IMLREG_INVALID) + if (writtenGPR1.IsValid()) F(writtenGPR1); - if (writtenGPR2 != IMLREG_INVALID) + if (writtenGPR2.IsValid()) F(writtenGPR2); } template void ForEachReadGPR(Fn F) const { - if (readGPR1 != IMLREG_INVALID) + if (readGPR1.IsValid()) F(readGPR1); - if (readGPR2 != IMLREG_INVALID) + if (readGPR2.IsValid()) F(readGPR2); - if (readGPR3 != IMLREG_INVALID) + if (readGPR3.IsValid()) F(readGPR3); } template void ForEachAccessedGPR(Fn F) const { - if (readGPR1 != IMLREG_INVALID) + if (readGPR1.IsValid()) F(readGPR1, false); - if (readGPR2 != IMLREG_INVALID) + if (readGPR2.IsValid()) F(readGPR2, false); - if (readGPR3 != IMLREG_INVALID) + if (readGPR3.IsValid()) F(readGPR3, false); - if (writtenGPR1 != IMLREG_INVALID) + if (writtenGPR1.IsValid()) F(writtenGPR1, true); - if (writtenGPR2 != IMLREG_INVALID) + if (writtenGPR2.IsValid()) F(writtenGPR2, true); } - bool HasFPRReg(sint16 imlReg) const + bool HasSameBaseFPRRegId(IMLRegID regId) const { - cemu_assert_debug(imlReg != IMLREG_INVALID); - if (readFPR1 == imlReg) + if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) return true; - if (readFPR2 == imlReg) + if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) return true; - if (readFPR3 == imlReg) + if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) return true; - if (readFPR4 == imlReg) + if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) return true; - if (writtenFPR1 == imlReg) + if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) return true; return false; } @@ -316,6 +418,12 @@ struct IMLUsedRegisters struct IMLInstruction { + IMLInstruction() {} + IMLInstruction(const IMLInstruction& other) + { + memcpy(this, &other, sizeof(IMLInstruction)); + } + uint8 type; uint8 operation; union @@ -370,6 +478,7 @@ struct IMLInstruction uint32 param; uint32 param2; uint16 paramU16; + IMLReg paramReg; }op_macro; struct { @@ -446,7 +555,7 @@ struct IMLInstruction struct { // r_s32 - uint8 regR; + IMLReg regR; sint32 immS32; // condition uint8 crRegisterIndex; @@ -479,16 +588,17 @@ struct IMLInstruction void make_debugbreak(uint32 currentPPCAddress = 0) { - make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0); + make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID); } - void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam) { this->type = PPCREC_IML_TYPE_MACRO; this->operation = macroId; this->op_macro.param = param; this->op_macro.param2 = param2; this->op_macro.paramU16 = paramU16; + this->op_macro.paramReg = regParam; } void make_cjump_cycle_check() @@ -497,85 +607,85 @@ struct IMLInstruction this->operation = 0; } - void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA) + void make_r_r(uint32 operation, IMLReg regR, IMLReg regA) { this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; - this->op_r_r.regR = registerResult; - this->op_r_r.regA = registerA; + this->op_r_r.regR = regR; + this->op_r_r.regA = regA; } - void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32) + void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32) { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; - this->op_r_immS32.regR = registerIndex; + this->op_r_immS32.regR = regR; this->op_r_immS32.immS32 = immS32; } - void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB) + void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB) { this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; - this->op_r_r_r.regR = registerResult; - this->op_r_r_r.regA = registerA; - this->op_r_r_r.regB = registerB; + this->op_r_r_r.regR = regR; + this->op_r_r_r.regA = regA; + this->op_r_r_r.regB = regB; } - void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry) + void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry) { this->type = PPCREC_IML_TYPE_R_R_R_CARRY; this->operation = operation; - this->op_r_r_r_carry.regR = registerResult; - this->op_r_r_r_carry.regA = registerA; - this->op_r_r_r_carry.regB = registerB; - this->op_r_r_r_carry.regCarry = registerCarry; + this->op_r_r_r_carry.regR = regR; + this->op_r_r_r_carry.regA = regA; + this->op_r_r_r_carry.regB = regB; + this->op_r_r_r_carry.regCarry = regCarry; } - void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32) + void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32) { this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; - this->op_r_r_s32.regR = registerResult; - this->op_r_r_s32.regA = registerA; + this->op_r_r_s32.regR = regR; + this->op_r_r_s32.regA = regA; this->op_r_r_s32.immS32 = immS32; } - void make_r_r_s32_carry(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 registerCarry) + void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry) { this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; this->operation = operation; - this->op_r_r_s32_carry.regR = registerResult; - this->op_r_r_s32_carry.regA = registerA; + this->op_r_r_s32_carry.regR = regR; + this->op_r_r_s32_carry.regA = regA; this->op_r_r_s32_carry.immS32 = immS32; - this->op_r_r_s32_carry.regCarry = registerCarry; + this->op_r_r_s32_carry.regCarry = regCarry; } - void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond) + void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE; this->operation = -999; - this->op_compare.regR = registerResult; - this->op_compare.regA = registerA; - this->op_compare.regB = registerB; + this->op_compare.regR = regR; + this->op_compare.regA = regA; + this->op_compare.regB = regB; this->op_compare.cond = cond; } - void make_compare_s32(uint8 registerA, sint32 immS32, uint8 registerResult, IMLCondition cond) + void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE_S32; this->operation = -999; - this->op_compare_s32.regR = registerResult; - this->op_compare_s32.regA = registerA; + this->op_compare_s32.regR = regR; + this->op_compare_s32.regA = regA; this->op_compare_s32.immS32 = immS32; this->op_compare_s32.cond = cond; } - void make_conditional_jump(uint8 registerBool, bool mustBeTrue) + void make_conditional_jump(IMLReg regBool, bool mustBeTrue) { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; - this->op_conditional_jump.registerBool = registerBool; + this->op_conditional_jump.registerBool = regBool; this->op_conditional_jump.mustBeTrue = mustBeTrue; } @@ -586,12 +696,12 @@ struct IMLInstruction } // load from memory - void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) + void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { this->type = PPCREC_IML_TYPE_LOAD; this->operation = 0; - this->op_storeLoad.registerData = registerDestination; - this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.registerData = regD; + this->op_storeLoad.registerMem = regMem; this->op_storeLoad.immS32 = immS32; this->op_storeLoad.copyWidth = copyWidth; this->op_storeLoad.flags2.swapEndian = switchEndian; @@ -599,12 +709,12 @@ struct IMLInstruction } // store to memory - void make_memory_r(uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) + void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian) { this->type = PPCREC_IML_TYPE_STORE; this->operation = 0; - this->op_storeLoad.registerData = registerSource; - this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.registerData = regS; + this->op_storeLoad.registerMem = regMem; this->op_storeLoad.immS32 = immS32; this->op_storeLoad.copyWidth = copyWidth; this->op_storeLoad.flags2.swapEndian = switchEndian; @@ -633,7 +743,8 @@ struct IMLInstruction void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; - void RewriteGPR(const std::unordered_map& translationTable); - void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); - void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); + void RewriteGPR(const std::unordered_map& translationTable); + void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); + void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced); + }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 7a5ad3798..568c0b791 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -8,13 +8,18 @@ bool _RegExceedsFPRSpace(IMLReg r) { - if (r == IMLREG_INVALID) + if (r.IsInvalid()) return false; - if ((uint32)r >= PPC_X64_FPR_USABLE_REGISTERS) + if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) return true; return false; } +IMLReg _FPRRegFromID(IMLRegID regId) +{ + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); +} + bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { // only xmm0 to xmm14 may be used, xmm15 is reserved @@ -48,34 +53,34 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte while( true ) { segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); - if(registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS) + if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) { // get index of register to replace sint32 fprToReplace = -1; if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) - fprToReplace = registersUsed.readFPR1; + fprToReplace = registersUsed.readFPR1.GetRegID(); else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) - fprToReplace = registersUsed.readFPR2; + fprToReplace = registersUsed.readFPR2.GetRegID(); else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) - fprToReplace = registersUsed.readFPR3; + fprToReplace = registersUsed.readFPR3.GetRegID(); else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) - fprToReplace = registersUsed.readFPR4; + fprToReplace = registersUsed.readFPR4.GetRegID(); else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) - fprToReplace = registersUsed.writtenFPR1; + fprToReplace = registersUsed.writtenFPR1.GetRegID(); if (fprToReplace >= 0) { // generate mask of useable registers uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 - if (registersUsed.readFPR1 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR1)); - if (registersUsed.readFPR2 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR2)); - if (registersUsed.readFPR3 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR3)); - if (registersUsed.readFPR4 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR4)); - if (registersUsed.writtenFPR1 != -1) - useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1)); + if (registersUsed.readFPR1.IsValid()) + useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); + if (registersUsed.readFPR2.IsValid()) + useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); + if (registersUsed.readFPR3.IsValid()) + useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); + if (registersUsed.readFPR4.IsValid()) + useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); + if (registersUsed.writtenFPR1.IsValid()) + useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); // get highest unused register index (0-6 range) sint32 unusedRegisterIndex = -1; for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) @@ -107,7 +112,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; } else @@ -116,14 +121,14 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; // name_gprToReplace = unusedRegister imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; // unusedRegister = name_unusedRegister imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); @@ -132,7 +137,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte { imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = unusedRegisterIndex; + imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; } else @@ -190,8 +195,8 @@ ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegist continue; if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = instructionUsedRegisters->HasFPRReg(virtualReg); + IMLRegID virtualReg = rCtx->currentMapping[i].virtualReg; + bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); if (isReserved) continue; if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) @@ -219,22 +224,22 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon if (idxInst.IsSuffixInstruction()) break; idxInst.CheckRegisterUsage(®istersUsed); - sint32 fprMatch[4]; // should be IMLReg, but this code is being dropped soon anyway - sint32 fprReplace[4]; - fprMatch[0] = -1; // should be IMLREG_INVALID - fprMatch[1] = -1; - fprMatch[2] = -1; - fprMatch[3] = -1; - fprReplace[0] = -1; - fprReplace[1] = -1; - fprReplace[2] = -1; - fprReplace[3] = -1; + IMLReg fprMatch[4]; + IMLReg fprReplace[4]; + fprMatch[0] = IMLREG_INVALID; + fprMatch[1] = IMLREG_INVALID; + fprMatch[2] = IMLREG_INVALID; + fprMatch[3] = IMLREG_INVALID; + fprReplace[0] = IMLREG_INVALID; + fprReplace[1] = IMLREG_INVALID; + fprReplace[2] = IMLREG_INVALID; + fprReplace[3] = IMLREG_INVALID; // generate a mask of registers that we may not free sint32 numReplacedOperands = 0; uint32 unloadLockedMask = 0; for (sint32 f = 0; f < 5; f++) { - sint32 virtualFpr; + IMLReg virtualFpr; if (f == 0) virtualFpr = registersUsed.readFPR1; else if (f == 1) @@ -245,12 +250,14 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon virtualFpr = registersUsed.readFPR4; else if (f == 4) virtualFpr = registersUsed.writtenFPR1; - if(virtualFpr == IMLREG_INVALID) + if(virtualFpr.IsInvalid()) continue; - cemu_assert_debug(virtualFpr < 64); + cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); + cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); + cemu_assert_debug(virtualFpr.GetRegID() < 64); // check if this virtual FPR is already loaded in any real register ppcRecRegisterMapping_t* regMapping; - if (rCtx.ppcRegToMapping[virtualFpr] == -1) + if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) { // not loaded // find available register @@ -269,7 +276,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = (uint8)(unloadRegMapping - rCtx.currentMapping); + imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; idx++; // update mapping @@ -285,18 +292,18 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = (uint8)(regMapping-rCtx.currentMapping); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr]; + imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); + imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; idx++; // update mapping - regMapping->virtualReg = virtualFpr; - rCtx.ppcRegToMapping[virtualFpr] = (sint32)(regMapping - rCtx.currentMapping); + regMapping->virtualReg = virtualFpr.GetRegID(); + rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); regMapping->lastUseIndex = rCtx.currentUseIndex; rCtx.currentUseIndex++; } else { - regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr]; + regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; regMapping->lastUseIndex = rCtx.currentUseIndex; rCtx.currentUseIndex++; } @@ -304,9 +311,9 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon bool entryFound = false; for (sint32 t = 0; t < numReplacedOperands; t++) { - if (fprMatch[t] == virtualFpr) + if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) { - cemu_assert_debug(fprReplace[t] == (regMapping - rCtx.currentMapping)); + cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); entryFound = true; break; } @@ -315,7 +322,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon { cemu_assert_debug(numReplacedOperands != 4); fprMatch[numReplacedOperands] = virtualFpr; - fprReplace[numReplacedOperands] = (sint32)(regMapping - rCtx.currentMapping); + fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); numReplacedOperands++; } } @@ -345,7 +352,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = i; + imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; idx++; } @@ -377,7 +384,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.readGPR1 == registerIndex || registersUsed.readGPR2 == registerIndex || registersUsed.readGPR3 == registerIndex ) return false; - if (registersUsed.IsGPRWritten(registerIndex)) + if (registersUsed.IsBaseGPRWritten(registerIndex)) return true; } // todo: Scan next segment(s) @@ -389,15 +396,15 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG */ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.regR; + IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); for(size_t i=startIndex; iimlList.size(); i++) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex) + if( registersUsed.readFPR1.IsValidAndSameRegID(regId) || registersUsed.readFPR2.IsValidAndSameRegID(regId) || registersUsed.readFPR3.IsValidAndSameRegID(regId) || registersUsed.readFPR4.IsValidAndSameRegID(regId)) return false; - if( registersUsed.writtenFPR1 == registerIndex ) + if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId) ) return true; } // todo: Scan next segment(s) @@ -409,13 +416,13 @@ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcI */ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.regR; + IMLReg regR = nameStoreInstruction->op_r_name.regR; for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.IsGPRWritten(registerIndex) ) + if( registersUsed.IsBaseGPRWritten(regR) ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) return true; @@ -468,13 +475,13 @@ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcI */ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) { - sint16 registerIndex = nameStoreInstruction->op_r_name.regR; + IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); for(sint32 i=startIndex; i>=0; i--) { IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.writtenFPR1 == registerIndex ) + if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId)) { if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) return true; @@ -485,8 +492,10 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc return false; } -void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) +void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg) { + IMLRegID fprIndex = fprReg.GetRegID(); + IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) return; @@ -504,7 +513,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) || (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0)) { - if (imlInstruction->op_storeLoad.registerData == fprIndex) + if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex) { if (foundMatch == false) { @@ -524,15 +533,15 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI // check if FPR is overwritten (we can actually ignore read operations?) imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenFPR1 == fprIndex) + if (registersUsed.writtenFPR1.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR1 == fprIndex) + if (registersUsed.readFPR1.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR2 == fprIndex) + if (registersUsed.readFPR2.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR3 == fprIndex) + if (registersUsed.readFPR3.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR4 == fprIndex) + if (registersUsed.readFPR4.IsValidAndSameRegID(fprIndex)) break; } @@ -540,7 +549,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI { // insert expand instruction after store IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, fprIndex); + PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, _FPRRegFromID(fprIndex)); } } @@ -574,8 +583,12 @@ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContex } } -void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 gprIndex) +void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg) { + cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes + cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID gprIndex = gprReg.GetRegID(); IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) return; @@ -591,9 +604,9 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp // check if GPR is stored if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) ) { - if (imlInstruction->op_storeLoad.registerMem == gprIndex) + if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex) break; - if (imlInstruction->op_storeLoad.registerData == gprIndex) + if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex) { IMLInstruction* imlInstructionStore = imlInstruction; if (foundMatch == false) @@ -610,18 +623,18 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp } // check if GPR is accessed imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.readGPR1 == gprIndex || - registersUsed.readGPR2 == gprIndex || - registersUsed.readGPR3 == gprIndex) + if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) || + registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) || + registersUsed.readGPR3.IsValidAndSameRegID(gprIndex)) { break; } - if (registersUsed.IsGPRWritten(gprIndex)) + if (registersUsed.IsBaseGPRWritten(gprReg)) return; // GPR overwritten, we don't need to byte swap anymore } if (foundMatch) { - PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); + PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg); } } @@ -650,15 +663,19 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont } } -sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, sint32 registerIndex) +sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg) { - if (registerIndex == PPC_REC_INVALID_REGISTER) + if (gqrReg.IsInvalid()) return -1; - sint32 namedReg = ppcImlGenContext->mappedRegister[registerIndex]; + sint32 namedReg = ppcImlGenContext->mappedRegister[gqrReg.GetRegID()]; if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7)) { return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0); } + else + { + cemu_assert_suspicious(); + } return -1; } @@ -694,7 +711,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) continue; // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); cemu_assert(gqrIndex >= 0); if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) @@ -720,7 +737,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; + instIt.op_storeLoad.registerGQR = IMLREG_INVALID; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) { @@ -735,7 +752,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; + instIt.op_storeLoad.registerGQR = IMLREG_INVALID; } } else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) @@ -744,7 +761,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) continue; // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); + cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); cemu_assert(gqrIndex >= 0 && gqrIndex < 8); if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) @@ -769,7 +786,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; + instIt.op_storeLoad.registerGQR = IMLREG_INVALID; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) { @@ -784,7 +801,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; + instIt.op_storeLoad.registerGQR = IMLREG_INVALID; } } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 5d11d0c2a..351306ec5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -123,18 +123,23 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml typedef struct { - uint16 registerIndex; + IMLRegID registerIndex; uint16 registerName; }raLoadStoreInfo_t; -void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) +IMLReg _MakeNativeGPR(IMLRegID regId) +{ + return IMLReg(IMLRegFormat::I64, IMLRegFormat::I64, 0, regId); +} + +void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = registerIndex; + imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex); imlInstructionItr->op_r_name.name = registerName; } @@ -147,19 +152,19 @@ void PPCRecRA_insertGPRLoadInstructions(IMLSegment* imlSegment, sint32 insertInd IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = (uint8)loadList[i].registerIndex; + imlInstructionItr->op_r_name.regR = _MakeNativeGPR(loadList[i].registerIndex); imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; } } -void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) +void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = registerIndex; + imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex); imlInstructionItr->op_r_name.name = registerName; } @@ -173,7 +178,7 @@ void PPCRecRA_insertGPRStoreInstructions(IMLSegment* imlSegment, sint32 insertIn memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = (uint8)storeList[i].registerIndex; + imlInstructionItr->op_r_name.regR = _MakeNativeGPR(storeList[i].registerIndex); imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; } } @@ -368,7 +373,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) { - uint32 regId = regToSearch & 0xFF; + uint32 regId = regToSearch.GetRegID(); raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_perVirtualGPR[regId]; while (subrangeItr) { @@ -828,7 +833,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; - std::unordered_map virt2PhysRegMap; // key = virtual register, value = physical register + std::unordered_map virtId2PhysRegIdMap; // key = virtual register, value = physical register IMLRALivenessTimeline livenessTimeline; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; @@ -850,7 +855,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML assert_dbg(); #endif virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -866,7 +871,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML if (virtualReg2PhysReg[expiredRange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[expiredRange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(expiredRange->range->virtualRegister); + virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (expiredRange->hasStore) @@ -900,13 +905,13 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // rewrite registers if (index < imlSegment->imlList.size()) - imlSegment->imlList[index].RewriteGPR(virt2PhysRegMap); + imlSegment->imlList[index].RewriteGPR(virtId2PhysRegIdMap); // next iml instruction index++; } @@ -919,7 +924,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML // update translation table cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(liverange->range->virtualRegister); + virtId2PhysRegIdMap.erase(liverange->range->virtualRegister); // store GPR if (liverange->hasStore) { @@ -951,7 +956,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -1063,7 +1068,8 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, while (index < imlSegment->imlList.size()) { imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { + gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { + IMLRegID gprId = gprReg.GetRegID(); cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX); imlSegment->raDistances.reg[gprId].usageStart = std::min(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction imlSegment->raDistances.reg[gprId].usageEnd = std::max(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction @@ -1156,7 +1162,8 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, while (index < imlSegment->imlList.size()) { imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { + gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { + IMLRegID gprId = gprReg.GetRegID(); // add location PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 247cfa8b8..7ca247ba6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -270,9 +270,9 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) accessedTempReg[4] = registersUsed.writtenFPR1; for (sint32 f = 0; f < 5; f++) { - if (accessedTempReg[f] == IMLREG_INVALID) + if (accessedTempReg[f].IsInvalid()) continue; - uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; + uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) { segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index ad4230015..042cf0675 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -15,15 +15,15 @@ void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); // GPR register management -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // FPR register management -uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); -uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); +IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); +void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); +void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult); // IML generation - FPU bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index c37126d57..f89edfe35 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -53,7 +53,7 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext return &inst; } -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) +void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { if(imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -70,8 +70,11 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; } -void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) +void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { + cemu_assert_debug(registerMemory1.IsValid()); + cemu_assert_debug(registerMemory2.IsValid()); + cemu_assert_debug(registerDestination.IsValid()); IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; @@ -83,8 +86,11 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } -void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) +void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { + cemu_assert_debug(registerMemory1.IsValid()); + cemu_assert_debug(registerMemory2.IsValid()); + cemu_assert_debug(registerDestination.IsValid()); IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; @@ -188,14 +194,14 @@ uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcIm return PPC_REC_INVALID_REGISTER; } -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER) - return loadedRegisterIndex; + return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, loadedRegisterIndex); uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return registerIndex; + return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, registerIndex); } IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) @@ -225,7 +231,7 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) // get throw-away register. Only valid for the scope of a single translated instruction // be careful to not collide with manually loaded temporary register -uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 4); return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); @@ -235,29 +241,29 @@ uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register */ -uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) +IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) { if( loadNew == false ) { uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); } uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return registerIndex; + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); } /* * Checks if a PPC fpr register is already loaded into any IML register - * If no, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register) + * If not, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register) */ -uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return registerIndex; + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) @@ -334,16 +340,16 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin } // for handling RC bit of many instructions -void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) +void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, IMLReg regR) { IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - // todo - SO bit? + // todo - SO bit - ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT); - ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegGT, IMLCondition::SIGNED_GT); - ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegLT, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegGT, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegEQ, IMLCondition::EQ); //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER @@ -355,7 +361,7 @@ void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // split before and after to make sure the macro is in an isolated segment that we can make enterable PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction); - PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); middleSeg->SetLinkBranchTaken(nullptr); middleSeg->SetLinkBranchNotTaken(nullptr); @@ -369,12 +375,12 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); if (spr == SPR_CTR || spr == SPR_LR) { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; } @@ -391,12 +397,12 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); if (spr == SPR_LR || spr == SPR_CTR) { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); } else @@ -417,7 +423,7 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { // TBL / TBU uint32 param2 = spr | (rD << 16); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0, IMLREG_INVALID); IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); return true; @@ -515,14 +521,14 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { // function call - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } // is jump destination within recompiled function? if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) ppcImlGenContext->emitInst().make_jump(); else - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } @@ -564,7 +570,7 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue); - blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); + blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } return false; @@ -623,18 +629,18 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if (!BO.conditionIgnore()) regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit); - uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); + IMLReg branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); if (LK) { if (sprReg == SPR_LR) { // if the branch target is LR, then preserve it in a temporary cemu_assert_suspicious(); // this case needs testing - uint32 tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + IMLReg tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); branchDestReg = tmpRegister; } - uint32 registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + IMLReg registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } @@ -651,14 +657,14 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, !BO.conditionInverted()); - bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg); } else { // branch always, no condition and no decrementer check cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg); } return true; } @@ -879,9 +885,9 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand = _GetRegGPR(ppcImlGenContext, rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, (sint32)imm); return true; } @@ -889,17 +895,16 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = hCPU->gpr[rA] * hCPU->gpr[rB]; - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); - uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); if (opcode & PPC_OPC_OE) { return false; } - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, regB); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -907,12 +912,12 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); - uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, regD, regA, regB); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -920,12 +925,12 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); - uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, regD, regA, regB); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -933,12 +938,12 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); - uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regR = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, regR, regA, regB); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, regR); return true; } @@ -946,13 +951,12 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = (uint32)a / (uint32)b; - uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD); - uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA); - uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, regD, regA, regB); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -962,30 +966,30 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 mask = ppc_mask(MB, ME); - uint32 registerRS = _GetRegGPR(ppcImlGenContext, rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if( ME == (31-SH) && MB == 0 ) { // SLWI - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regA, regS, SH); } else if( SH == (32-MB) && ME == 31 ) { // SRWI - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, registerRA, registerRS, MB); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regA, regS, MB); } else { // general handler - if (registerRA != registerRS) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRA, registerRS); + if (rA != rS) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); if (SH != 0) - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regA, SH); if (mask != 0xFFFFFFFF) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask); } if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -994,13 +998,13 @@ bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opc int rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // pack RLWIMI parameters into single integer uint32 vImm = MB|(ME<<8)|(SH<<16); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, regA, regS, (sint32)vImm); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1009,14 +1013,14 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA, rB, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME); uint32 mask = ppc_mask(MB, ME); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); + IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, regA, regS, regB); if( mask != 0xFFFFFFFF ) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1026,39 +1030,39 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 registerCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); - uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); - uint32 registerTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); - uint32 registerTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); - uint32 registerTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + IMLReg regTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + IMLReg regTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); // load masked shift factor into temporary register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F); - ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT); - ppcImlGenContext->emitInst().make_conditional_jump(registerTmpCondBool, true); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F); + ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 32, regTmpCondBool, IMLCondition::UNSIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) { /* branch taken */ - genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount); - genCtx.emitInst().make_compare_s32(registerRA, 0, registerCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry + genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount); + genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry }, [&](ppcImlGenContext_t& genCtx) { /* branch not taken, shift size below 32 */ - genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp1, registerRS, 31); // signMask = input >> 31 (arithmetic shift) - genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerTmp2, 1); // shiftMask = ((1<> 31 (arithmetic shift) + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regTmp2, 1); // shiftMask = ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmp, registerTmp, ((1 << SH) - 1)); - ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0) // do the actual shift - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1093,12 +1097,12 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - IMLReg registerRS = _GetRegGPR(ppcImlGenContext, rS); - IMLReg registerRB = _GetRegGPR(ppcImlGenContext, rB); - IMLReg registerRA = _GetRegGPR(ppcImlGenContext, rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, regA, regS, regB); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1208,7 +1212,7 @@ bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint regB = regA; regA = IMLREG_INVALID; } - if(regA != IMLREG_INVALID) + if(regA.IsValid()) PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian); else ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian); @@ -1235,7 +1239,7 @@ bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); if (updateAddrReg) { - if (regD == regA) + if (rD == rA) { // make sure to keep source data intact regD = _GetRegTemporary(ppcImlGenContext, 0); @@ -1270,7 +1274,7 @@ bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uin regB = regA; regA = IMLREG_INVALID; } - if (regA == IMLREG_INVALID) + if (regA.IsInvalid()) ppcImlGenContext->emitInst().make_memory_r(regSrc, regB, 0, bitWidth, isBigEndian); else PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, regSrc, regA, regB, bitWidth, false, isBigEndian); @@ -1405,7 +1409,7 @@ bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); // calculate EA - if (regA != IMLREG_INVALID) + if (regA.IsValid()) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); else ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); @@ -1426,7 +1430,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); // calculate EA IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); - if (regA != IMLREG_INVALID) + if (regA.IsValid()) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB); else ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); @@ -1466,7 +1470,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure // "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared - // There may also be different behavior between individual PPC generations + // There may also be different behavior between individual PPC architectures ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); @@ -1479,8 +1483,8 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod rA = (opcode>>16)&0x1F; rB = (opcode>>11)&0x1F; // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID; + IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // store if( rA != 0 ) ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); @@ -1496,7 +1500,7 @@ bool PPCRecompilerImlGen_OR_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opc IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); - if(regS == regB) // check for MR mnemonic + if(rS == rB) // check for MR mnemonic ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); else ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regB); @@ -1515,7 +1519,7 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); - sint32 regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regTmp); if (opcode & PPC_OPC_RC) @@ -1549,7 +1553,7 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); - sint32 regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regTmp); if (opcode & PPC_OPC_RC) @@ -1717,7 +1721,7 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_HLE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { uint32 hleFuncId = opcode&0xFFFF; - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0, IMLREG_INVALID); return true; } @@ -2931,7 +2935,7 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); splitSeg->SetLinkBranchTaken(exitSegment); - exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0); + exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0, IMLREG_INVALID); } void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index f2f5f0d3c..ffee73eac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -6,7 +6,7 @@ IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) { // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -20,7 +20,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) { // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -35,7 +35,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) { // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -49,7 +49,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) { // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -64,7 +64,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr OP fpr IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -74,7 +74,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI imlInstruction->op_fpr_r_r.regA = registerOperand; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand1, uint8 registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr = OP (fpr,fpr) IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -85,7 +85,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp imlInstruction->op_fpr_r_r_r.regB = registerOperand2; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperandA, uint8 registerOperandB, uint8 registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) { // fpr = OP (fpr,fpr,fpr) IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -97,7 +97,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* imlInstruction->op_fpr_r_r_r_r.regC = registerOperandC; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister) +void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult) { // OP (fpr) if(imlInstruction == NULL) @@ -110,7 +110,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml /* * Rounds the bottom double to single precision (if single precision accuracy is emulated) */ -void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, uint32 fprRegister, bool flushDenormals=false) +void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); if( flushDenormals ) @@ -120,7 +120,7 @@ void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext /* * Rounds pair of doubles to single precision (if single precision accuracy is emulated) */ -void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, uint32 fprRegister, bool flushDenormals=false) +void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister); if( flushDenormals ) @@ -133,9 +133,9 @@ bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); @@ -153,11 +153,11 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); @@ -179,10 +179,10 @@ bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); @@ -204,12 +204,12 @@ bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // add rB to rA (if rA != 0) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); @@ -231,9 +231,9 @@ bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode assert_dbg(); } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); return true; } @@ -248,11 +248,11 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod assert_dbg(); } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // emit load iml PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); return true; @@ -268,10 +268,10 @@ bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); return true; } @@ -285,13 +285,11 @@ bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco debugBreakpoint(); return false; } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // add rB to rA (if rA != 0) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); return true; } @@ -301,10 +299,8 @@ bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); return true; @@ -316,11 +312,11 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); return true; @@ -336,10 +332,10 @@ bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); @@ -362,10 +358,10 @@ bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); // calculate EA in rA ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); @@ -384,9 +380,9 @@ bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); return true; } @@ -402,11 +398,11 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); // add imm to memory register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); return true; @@ -422,10 +418,10 @@ bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( ppcImlGenContext->LSQE ) { PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); @@ -442,8 +438,8 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); // get memory gpr registers - uint32 gprRegister1; - uint32 gprRegister2; + IMLReg gprRegister1; + IMLReg gprRegister2; if( rA != 0 ) { gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); @@ -453,10 +449,10 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { // rA is not used gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - gprRegister2 = 0; + gprRegister2 = IMLREG_INVALID; } // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( rA != 0 ) PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); else @@ -471,9 +467,9 @@ bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_ASSERT(frC==0); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); return true; @@ -486,9 +482,9 @@ bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_ASSERT(frC==0); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // subtract bottom double of frB from bottom double of frD PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); return true; @@ -506,9 +502,9 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod frC = temp; } // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frA to frD (if different register) if( fprRegisterD != fprRegisterA ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 @@ -523,13 +519,13 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); if( frB == frD && frA != frB ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); // move frA to temporary register PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); // divide bottom double of temporary register by bottom double of frB @@ -551,14 +547,14 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); // move frA to temporary register PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); // multiply bottom double of temporary register with bottom double of frC @@ -571,7 +567,7 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( fprRegisterD == fprRegisterC ) { // swap frA and frC - sint32 temp = fprRegisterA; + IMLReg temp = fprRegisterA; fprRegisterA = fprRegisterC; fprRegisterC = temp; } @@ -590,10 +586,10 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { @@ -604,7 +600,7 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( fprRegisterD == fprRegisterC ) { // swap frA and frC - sint32 temp = fprRegisterA; + IMLReg temp = fprRegisterA; fprRegisterA = fprRegisterC; fprRegisterC = temp; } @@ -624,15 +620,15 @@ bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { // hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frD].fpr); - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); //// negate frB/frD //PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD, true); // move frA to temporary register @@ -651,7 +647,7 @@ bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc if( fprRegisterD == fprRegisterC ) { // swap frA and frC - sint32 temp = fprRegisterA; + IMLReg temp = fprRegisterA; fprRegisterA = fprRegisterC; fprRegisterC = temp; } @@ -680,9 +676,9 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frC = temp; } // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frA to frD (if different register) if( fprRegisterD != fprRegisterA ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 @@ -709,13 +705,13 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( hCPU->PSE ) hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;*/ // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( frB == frD && frA != frB ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); // move frA to temporary register PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); // divide bottom double of temporary register by bottom double of frB @@ -759,9 +755,9 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frB = temp; } // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frA to frD (if different register) if( fprRegisterD != fprRegisterA ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 @@ -784,9 +780,9 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_ASSERT(frB==0); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // subtract bottom PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); // adjust accuracy @@ -808,11 +804,11 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) fprRegisterTemp = fprRegisterD; @@ -842,11 +838,11 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) fprRegisterTemp = fprRegisterD; @@ -879,11 +875,11 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) fprRegisterTemp = fprRegisterD; @@ -965,8 +961,8 @@ bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 frD, rA, frB; PPC_OPC_TEMPL_X(opcode, frD, rA, frB); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); return true; } @@ -977,8 +973,8 @@ bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frB to frD (if different register) if( fprRegisterD != fprRegisterB ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); @@ -993,8 +989,8 @@ bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frB to frD (if different register) if( fprRegisterD != fprRegisterB ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); @@ -1009,8 +1005,8 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); @@ -1022,8 +1018,8 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( fprRegisterD != fprRegisterB ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); @@ -1046,8 +1042,8 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // move frB to frD (if different register) if( fprRegisterD != fprRegisterB ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); @@ -1064,10 +1060,10 @@ bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { return false; } - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); return true; } @@ -1077,8 +1073,8 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); // hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, fprRegisterD, fprRegisterB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); @@ -1089,8 +1085,8 @@ bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, fprRegisterD, fprRegisterB); return true; } @@ -1108,12 +1104,9 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool readPS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // psq load PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); return true; @@ -1134,14 +1127,12 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool readPS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - // add imm to memory register + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired load PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); return true; @@ -1159,12 +1150,9 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool storePS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); return true; @@ -1185,14 +1173,11 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op bool storePS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - // add imm to memory register + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + + IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); return true; @@ -1205,11 +1190,11 @@ bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); // if frD == frA we can multiply frD immediately and safe a copy instruction if( frD == frA ) @@ -1235,11 +1220,11 @@ bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); // if frD == frA we can multiply frD immediately and safe a copy instruction if( frD == frA ) @@ -1268,12 +1253,12 @@ bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp1); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction if( frD == frA && frD != frB ) @@ -1306,12 +1291,12 @@ bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp1 in bottom and top half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction if( frD == frA && frD != frB ) @@ -1343,9 +1328,9 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( frD == frA ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); @@ -1373,9 +1358,9 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterA, fprRegisterB); // adjust accuracy PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); @@ -1389,11 +1374,11 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // we need a temporary register - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA we can multiply frD immediately and safe a copy instruction @@ -1422,9 +1407,9 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA we can divide frD immediately and safe a copy instruction if (frD == frA) @@ -1434,7 +1419,7 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc else { // we need a temporary register - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterA); // we divide temporary by frB PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterTemp, fprRegisterB); @@ -1457,12 +1442,12 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction @@ -1495,12 +1480,12 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o frD = (opcode>>21)&0x1F; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction @@ -1539,12 +1524,12 @@ bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 op //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction @@ -1577,12 +1562,12 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o frD = (opcode>>21)&0x1F; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction @@ -1620,10 +1605,10 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM0, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); // adjust accuracy PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); @@ -1642,10 +1627,10 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM1, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); // adjust accuracy PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); @@ -1660,8 +1645,8 @@ bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1; // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterB); return true; } @@ -1672,8 +1657,8 @@ bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ABS_PAIR, fprRegisterD, fprRegisterB); return true; } @@ -1687,8 +1672,8 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)hCPU->fpr[frB].fp1); // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRES_PAIR, fprRegisterD, fprRegisterB); return true; @@ -1703,8 +1688,8 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp1)); // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRSQRTE_PAIR, fprRegisterD, fprRegisterB); return true; } @@ -1719,8 +1704,8 @@ bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // load registers if( frB != frD ) { - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterB); } return true; @@ -1734,10 +1719,10 @@ bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_PAIR, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); return true; } @@ -1752,10 +1737,10 @@ bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 //float s1 = (float)hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // unpcklpd + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + if( frA == frB ) { // simply duplicate bottom into bottom and top of destination register @@ -1779,9 +1764,9 @@ bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 frD = (opcode>>21)&0x1F; // hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0; // hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( fprRegisterD != fprRegisterB ) PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); @@ -1798,9 +1783,9 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( frA == frB ) { // swap bottom and top @@ -1836,9 +1821,9 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( fprRegisterA == fprRegisterB ) { PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); @@ -1871,8 +1856,8 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; crfD = (opcode>>23)&0x7; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } @@ -1886,8 +1871,8 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } @@ -1901,8 +1886,8 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_TOP, fprRegisterA, fprRegisterB, crfD); return true; } \ No newline at end of file From 7c767383b4fbfa816bd827b66ef5e5c48594afee Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 4 Feb 2023 17:54:46 +0100 Subject: [PATCH 37/64] PPCRec: Partial support for typed registers in RA --- .../Espresso/Recompiler/IML/IMLInstruction.h | 6 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 348 ++++++++++-------- .../Recompiler/IML/IMLRegisterAllocator.h | 7 +- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 16 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 27 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 18 +- 6 files changed, 234 insertions(+), 188 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 9dec696d3..e4e6252f7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -17,7 +17,8 @@ enum class IMLRegFormat : uint8 I8, // I1 ? F64, - F32 + F32, + TYPE_COUNT, }; class IMLReg @@ -86,10 +87,9 @@ class IMLReg return IsValid() && GetRegID() == regId; } - // risky + // compare all fields bool operator==(const IMLReg& other) const { - //__debugbreak(); return m_raw == other.m_raw; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 351306ec5..e540518e6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -9,9 +9,45 @@ #include +struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment +{ + IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {}; + + void TrackInstruction(sint32 index) + { + usageStart = std::min(usageStart, index); + usageEnd = std::max(usageEnd, index + 1); // exclusive index + } + + sint32 usageStart; + sint32 usageEnd; + bool isProcessed{false}; + IMLRegFormat regBaseFormat; +}; + struct IMLRegisterAllocatorContext { IMLRegisterAllocatorParameters* raParam; + ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec + + std::unordered_map regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary + // first pass + std::vector> perSegmentAbstractRanges; + // second pass + + // helper methods + inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment) + { + return perSegmentAbstractRanges[imlSegment->momentaryIndex]; + } + + inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const + { + auto it = regIdToBaseFormat.find(regId); + cemu_assert_debug(it != regIdToBaseFormat.cend()); + return it->second; + } + }; uint32 recRACurrentIterationIndex = 0; @@ -455,7 +491,9 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon continue; } // find free register for current subrangeItr and segment - IMLPhysRegisterSet physRegSet = ctx.raParam->physicalRegisterPool; + IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->range->virtualRegister); + IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); + cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool for (auto& liverangeItr : livenessTimeline.activeRanges) { cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); @@ -976,10 +1014,6 @@ void IMLRA_GenerateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) } } -void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); -void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); -void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext); - void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input @@ -1030,97 +1064,100 @@ void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) } } -void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) -{ - IMLRegisterAllocatorContext ctx; - ctx.raParam = &raParam; - - IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); - - ppcImlGenContext->raInfo.list_ranges = std::vector(); - - IMLRA_CalculateLivenessRanges(ppcImlGenContext); - IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext); - IMLRA_AssignRegisters(ctx, ppcImlGenContext); - - IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); - IMLRA_GenerateMoveInstructions(ppcImlGenContext); - - PPCRecRA_deleteAllRanges(ppcImlGenContext); -} - - -bool _isRangeDefined(IMLSegment* imlSegment, sint32 vGPR) +IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) { - return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segMap.find(regId); + return it != segMap.end() ? &it->second : nullptr; } -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +// scan instructions and establish register usage range for segment +void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) - { - imlSegment->raDistances.reg[i].usageStart = INT_MAX; - imlSegment->raDistances.reg[i].usageEnd = INT_MIN; - } - // scan instructions for usage range - size_t index = 0; + size_t instructionIndex = 0; IMLUsedRegisters gprTracking; - while (index < imlSegment->imlList.size()) + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + while (instructionIndex < imlSegment->imlList.size()) { - imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); + imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking); gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { IMLRegID gprId = gprReg.GetRegID(); - cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX); - imlSegment->raDistances.reg[gprId].usageStart = std::min(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[gprId].usageEnd = std::max(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction + auto it = segDistMap.find(gprId); + if (it == segDistMap.end()) + { + segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1); + ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat()); + } + else + { + it->second.TrackInstruction(instructionIndex); +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same +#endif + } }); - index++; + instructionIndex++; } } -void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) { // for each register calculate min/max index of usage range within each segment - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + size_t dbgIndex = 0; + for (IMLSegment* segIt : ctx.deprGenContext->segmentList2) { - PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt); + cemu_assert_debug(segIt->momentaryIndex == dbgIndex); + IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt); + dbgIndex++; } } -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) +raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) { - if (imlSegment->raDistances.isProcessed[vGPR]) + IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); + if (!abstractRange) + return nullptr; + if (abstractRange->isProcessed) { // return already existing segment return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; } - imlSegment->raDistances.isProcessed[vGPR] = true; - if (_isRangeDefined(imlSegment, vGPR) == false) - return nullptr; + abstractRange->isProcessed = true; // create subrange cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); + raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); // traverse forward - if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + if (abstractRange->usageEnd == RA_INTER_RANGE_END) { - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (imlSegment->nextSegmentBranchTaken) { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); + if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + } } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (imlSegment->nextSegmentBranchNotTaken) { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); + if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + } } } // traverse backward - if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (abstractRange->usageStart == RA_INTER_RANGE_START) { for (auto& it : imlSegment->list_prevSegments) { - if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); + IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR); + if(!prevRange) + continue; + if (prevRange->usageEnd == RA_INTER_RANGE_END) + PPCRecRA_convertToMappedRanges(ctx, it, vGPR, range); } } // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction @@ -1135,17 +1172,19 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG return subrange; } -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +// take abstract range data and create LivenessRanges +void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (_isRangeDefined(imlSegment, i) == false) - continue; - if (imlSegment->raDistances.isProcessed[i]) + if(it.second.isProcessed) continue; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); - PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); + IMLRegID regId = it.first; + raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]); + PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); } + // create lookup table of ranges raLivenessSubrange_t* vGPR2Subrange[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) @@ -1168,7 +1207,10 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT if ((sint32)index < vGPR2Subrange[gprId]->start.index) + { + IMLRARegAbstractLiveness* dbgAbstractRange = _GetAbstractRange(ctx, imlSegment, gprId); assert_dbg(); + } if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index) assert_dbg(); #endif @@ -1177,57 +1219,63 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) +void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) { - if (_isRangeDefined(imlSegment, vGPR) == false) + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) { + sint32 startIndex; if(imlSegment->HasSuffixInstruction()) - imlSegment->raDistances.reg[vGPR].usageStart = imlSegment->GetSuffixInstructionIndex(); + startIndex = imlSegment->GetSuffixInstructionIndex(); else - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; - return; + startIndex = RA_INTER_RANGE_END; + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END); + } + else + { + it->second.usageEnd = RA_INTER_RANGE_END; } - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; } -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) +void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) { - if (_isRangeDefined(imlSegment, vGPR) == false) + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START); } else { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + it->second.usageStart = RA_INTER_RANGE_START; } // propagate backwards for (auto& it : imlSegment->list_prevSegments) { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId); } } -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, IMLSegment** route, sint32 routeDepth) +void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth) { #ifdef CEMU_DEBUG_ASSERT if (routeDepth < 2) assert_dbg(); #endif // extend starting range to end of segment - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId); // extend all the connecting segments in both directions for (sint32 i = 1; i < (routeDepth - 1); i++) { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId); } // extend the final segment towards the beginning - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId); } -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) +void _PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) { if (routeDepth >= 64) { @@ -1235,53 +1283,47 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLS return; } route[routeDepth] = currentSegment; - if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) + + IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID); + + if (!range) { - // measure distance to end of segment + // measure distance over entire segment distanceLeft -= (sint32)currentSegment->imlList.size(); if (distanceLeft > 0) { if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1); if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1); } return; } else { // measure distance to range - if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) + if (range->usageStart == RA_INTER_RANGE_END) { if (distanceLeft < (sint32)currentSegment->imlList.size()) return; // range too far away } - else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) + else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft) return; // out of range // found close range -> connect ranges - _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); + IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1); } } -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR) +void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID) { -#ifdef CEMU_DEBUG_ASSERT - if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) - assert_dbg(); -#endif + cemu_assert_debug(range->usageEnd >= 0); // count instructions to end of initial segment - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) - assert_dbg(); sint32 instructionsUntilEndOfSeg; - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + if (range->usageEnd == RA_INTER_RANGE_END) instructionsUntilEndOfSeg = 0; else - instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - currentSegment->raDistances.reg[vGPR].usageEnd; - -#ifdef CEMU_DEBUG_ASSERT - if (instructionsUntilEndOfSeg < 0) - assert_dbg(); -#endif + instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd; + cemu_assert_debug(instructionsUntilEndOfSeg >= 0); sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; if (remainingScanDist <= 0) return; // can't reach end @@ -1289,23 +1331,17 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); - } + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1); if (currentSegment->nextSegmentBranchTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); - } + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1); } -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (imlSegment->raDistances.reg[i].usageStart == INT_MAX) - continue; // not used - // check and extend if possible - PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); + PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first); } #ifdef CEMU_DEBUG_ASSERT if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) @@ -1315,7 +1351,7 @@ void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, #endif } -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { std::vector list_segments; list_segments.reserve(1000); @@ -1325,7 +1361,7 @@ void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IM while (index < list_segments.size()) { IMLSegment* currentSegment = list_segments[index]; - PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); + PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment); // follow flow if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) { @@ -1341,25 +1377,24 @@ void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IM } } -void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_mergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) { - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; if (imlSegment->list_prevSegments.empty()) { - if (imlSegment->raRangeExtendProcessed) - assert_dbg(); // should not happen - PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); + cemu_assert_debug(!imlSegment->raRangeExtendProcessed); // should not be processed yet + PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment); } } } -void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_extendAbstracRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) { - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; auto localLoopDepth = imlSegment->loopDepth; if (localLoopDepth <= 0) continue; // not inside a loop @@ -1376,31 +1411,28 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) if (hasLoopExit == false) continue; - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) - continue; // range not set or does not reach end of segment + if(it.second.usageEnd != RA_INTER_RANGE_END) + continue; if (imlSegment->nextSegmentBranchTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first); if (imlSegment->nextSegmentBranchNotTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first); } } } -void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) { - // merge close ranges - PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); + IMLRA_mergeCloseAbstractRanges(ctx); // extra pass to move register stores out of loops - PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); + IMLRA_extendAbstracRangesOutOfLoops(ctx); // calculate liveness ranges - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) - { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); - } + for (auto& segIt : ctx.deprGenContext->segmentList2) + IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); } void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) @@ -1447,4 +1479,28 @@ void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) _analyzeRangeDataFlow(subrange); } } -} \ No newline at end of file +} + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) +{ + IMLRegisterAllocatorContext ctx; + ctx.raParam = &raParam; + ctx.deprGenContext = ppcImlGenContext; + + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); + + ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment + + ppcImlGenContext->raInfo.list_ranges = std::vector(); + + ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); + + IMLRA_CalculateLivenessRanges(ctx); + IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); + IMLRA_AssignRegisters(ctx, ppcImlGenContext); + + IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ppcImlGenContext); + + PPCRecRA_deleteAllRanges(ppcImlGenContext); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 87e36b00c..5e0d0f044 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -88,7 +88,12 @@ class IMLPhysRegisterSet struct IMLRegisterAllocatorParameters { - IMLPhysRegisterSet physicalRegisterPool; + inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat) + { + return perTypePhysPool[stdx::to_underlying(regFormat)]; + } + + IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];// physicalRegisterPool; }; void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 8ef0669e4..4e90d5299 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -54,9 +54,9 @@ struct raLivenessSubrange_t struct raLivenessRange_t { - sint32 virtualRegister; + IMLRegID virtualRegister; sint32 physicalRegister; - sint32 name; + IMLName name; std::vector list_subranges; }; @@ -70,16 +70,6 @@ struct PPCSegmentRegisterAllocatorInfo_t raLivenessSubrange_t* linkedList_perVirtualGPR[IML_RA_VIRT_REG_COUNT_MAX]{}; }; -struct PPCRecVGPRDistances_t -{ - struct _RegArrayEntry - { - sint32 usageStart{}; - sint32 usageEnd{}; - }reg[IML_RA_VIRT_REG_COUNT_MAX]; - bool isProcessed[IML_RA_VIRT_REG_COUNT_MAX]{}; -}; - struct IMLSegment { sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) @@ -113,7 +103,7 @@ struct IMLSegment uint32 crBitsWritten{}; // bits that are written in this segment // register allocator info PPCSegmentRegisterAllocatorInfo_t raInfo{}; - PPCRecVGPRDistances_t raDistances{}; + //PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; // segment state API diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 7ca247ba6..b040275e3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -218,7 +218,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // collect list of PPC-->x64 entry points cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); - cemu_assert_debug(ppcImlGenContext.imlListCount == 0); entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) @@ -295,18 +294,20 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) } IMLRegisterAllocatorParameters raParam; - raParam.physicalRegisterPool.SetAvailable(X86_REG_RAX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RDX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RBX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RBP); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RSI); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RDI); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R8); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R9); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R10); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R11); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R12); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RCX); + + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); + gprPhysPool.SetAvailable(X86_REG_RAX); + gprPhysPool.SetAvailable(X86_REG_RDX); + gprPhysPool.SetAvailable(X86_REG_RBX); + gprPhysPool.SetAvailable(X86_REG_RBP); + gprPhysPool.SetAvailable(X86_REG_RSI); + gprPhysPool.SetAvailable(X86_REG_RDI); + gprPhysPool.SetAvailable(X86_REG_R8); + gprPhysPool.SetAvailable(X86_REG_R9); + gprPhysPool.SetAvailable(X86_REG_R10); + gprPhysPool.SetAvailable(X86_REG_R11); + gprPhysPool.SetAvailable(X86_REG_R12); + gprPhysPool.SetAvailable(X86_REG_RCX); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index c80fad8d9..7f9817aac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -45,10 +45,6 @@ struct ppcImlGenContext_t uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; // temporary floating point registers (single and double precision) uint32 mappedFPRRegister[256]; - // list of intermediate instructions - IMLInstruction* imlList; - sint32 imlListSize; - sint32 imlListCount; // list of segments std::vector segmentList2; // code generation control @@ -66,16 +62,8 @@ struct ppcImlGenContext_t ~ppcImlGenContext_t() { - if (imlList) - { - free(imlList); - imlList = nullptr; - } - for (IMLSegment* imlSegment : segmentList2) - { delete imlSegment; - } segmentList2.clear(); } @@ -117,6 +105,12 @@ struct ppcImlGenContext_t segmentList2[i] = new IMLSegment(); return { segmentList2.data() + index, count}; } + + void UpdateSegmentIndices() + { + for (size_t i = 0; i < segmentList2.size(); i++) + segmentList2[i]->momentaryIndex = (sint32)i; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); From b1c6646831a504c06562baf3f7541f921f6dd115 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 5 Feb 2023 21:20:26 +0100 Subject: [PATCH 38/64] PPCRec: Further work on support for typed registers in RA Additionally there is no more range limit for virtual RegIDs, making the entire uint16 space available in theory --- .../Espresso/Recompiler/IML/IMLInstruction.h | 18 ++ .../Recompiler/IML/IMLRegisterAllocator.cpp | 280 ++++++------------ .../IML/IMLRegisterAllocatorRanges.cpp | 60 ++-- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 13 +- 4 files changed, 159 insertions(+), 212 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index e4e6252f7..a3f0f652d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -586,6 +586,24 @@ struct IMLInstruction operation = 0; } + void make_r_name(IMLReg regR, IMLName name) + { + cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format + type = PPCREC_IML_TYPE_R_NAME; + operation = PPCREC_IML_OP_ASSIGN; + op_r_name.regR = regR; + op_r_name.name = name; + } + + void make_name_r(IMLName name, IMLReg regR) + { + cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format + type = PPCREC_IML_TYPE_NAME_R; + operation = PPCREC_IML_OP_ASSIGN; + op_r_name.regR = regR; + op_r_name.name = name; + } + void make_debugbreak(uint32 currentPPCAddress = 0) { make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index e540518e6..4b6100f72 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -118,107 +118,6 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml } } -//typedef struct -//{ -// sint32 name; -// sint32 virtualRegister; -// sint32 physicalRegister; -// bool isDirty; -//}raRegisterState_t; - -//const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; -// -//raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) -//{ -// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) -// { -// if (regState[i].virtualRegister == virtualRegister) -// { -//#ifdef CEMU_DEBUG_ASSERT -// if (regState[i].physicalRegister < 0) -// assert_dbg(); -//#endif -// return regState + i; -// } -// } -// return nullptr; -//} -// -//raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) -//{ -// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) -// { -// if (regState[i].physicalRegister < 0) -// { -// regState[i].physicalRegister = i; -// return regState + i; -// } -// } -// return nullptr; -//} - -typedef struct -{ - IMLRegID registerIndex; - uint16 registerName; -}raLoadStoreInfo_t; - -IMLReg _MakeNativeGPR(IMLRegID regId) -{ - return IMLReg(IMLRegFormat::I64, IMLRegFormat::I64, 0, regId); -} - -void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex); - imlInstructionItr->op_r_name.name = registerName; -} - -void PPCRecRA_insertGPRLoadInstructions(IMLSegment* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadCount); - memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(IMLInstruction)*loadCount); - for (sint32 i = 0; i < loadCount; i++) - { - IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _MakeNativeGPR(loadList[i].registerIndex); - imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; - } -} - -void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex); - imlInstructionItr->op_r_name.name = registerName; -} - -void PPCRecRA_insertGPRStoreInstructions(IMLSegment* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeCount); - memset(imlSegment->imlList.data() + (insertIndex + 0), 0x00, sizeof(IMLInstruction)*storeCount); - for (sint32 i = 0; i < storeCount; i++) - { - IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _MakeNativeGPR(storeList[i].registerIndex); - imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; - } -} - #define SUBRANGE_LIST_SIZE (128) sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) @@ -407,10 +306,23 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } +std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) +{ + return imlSegment->raInfo.linkedList_perVirtualGPR2; +} + +raLivenessSubrange_t* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) +{ + auto it = imlSegment->raInfo.linkedList_perVirtualGPR2.find(regId); + if (it == imlSegment->raInfo.linkedList_perVirtualGPR2.end()) + return nullptr; + return it->second; +} + raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) { uint32 regId = regToSearch.GetRegID(); - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_perVirtualGPR[regId]; + raLivenessSubrange_t* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); while (subrangeItr) { if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) @@ -763,8 +675,10 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* struct subrangeEndingInfo_t { + //boost::container::small_vector subrangeList2; raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; sint32 subrangeCount; + bool hasUndefinedEndings; }; @@ -866,11 +780,35 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) } } -void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) +{ + return IMLReg(baseFormat, baseFormat, 0, regId); +} + +void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) +{ + PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size()); + for (sint32 i = 0; i < loadList.size(); i++) + { + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->range->virtualRegister]; + cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); + imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->range->physicalRegister), loadList[i]->range->name); + } +} + +void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) +{ + PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size()); + for (size_t i = 0; i < storeList.size(); i++) + { + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->range->virtualRegister]; + cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); + imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->range->name, _MakeNativeReg(baseFormat, storeList[i]->range->physicalRegister)); + } +} + +void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) - virtualReg2PhysReg[i] = -1; std::unordered_map virtId2PhysRegIdMap; // key = virtual register, value = physical register IMLRALivenessTimeline livenessTimeline; sint32 index = 0; @@ -889,11 +827,9 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML assert_dbg(); } // update translation table - if (virtualReg2PhysReg[subrangeItr->range->virtualRegister] != -1) - assert_dbg(); + cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->range->virtualRegister)); #endif - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -906,15 +842,12 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) { // update translation table - if (virtualReg2PhysReg[expiredRange->range->virtualRegister] == -1) - assert_dbg(); - virtualReg2PhysReg[expiredRange->range->virtualRegister] = -1; virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (expiredRange->hasStore) { - PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), expiredRange->range->physicalRegister, expiredRange->range->name); + PPCRecRA_insertGPRStoreInstructions(ctx, imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), {&expiredRange, 1}); index++; } } @@ -936,13 +869,11 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML } if (subrangeItr->_noLoad == false) { - PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); + PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), {&subrangeItr , 1}); index++; subrangeItr->start.index--; } // update translation table - cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -954,31 +885,22 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML index++; } // expire infinite subranges (subranges which cross the segment border) - sint32 storeLoadListLength = 0; - raLoadStoreInfo_t loadStoreList[IML_RA_VIRT_REG_COUNT_MAX]; + std::vector loadStoreList; livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); for (auto liverange : livenessTimeline.GetExpiredRanges()) { // update translation table - cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; virtId2PhysRegIdMap.erase(liverange->range->virtualRegister); // store GPR if (liverange->hasStore) - { - loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = liverange->range->name; - storeLoadListLength++; - } + loadStoreList.emplace_back(liverange); } cemu_assert_debug(livenessTimeline.activeRanges.empty()); - if (storeLoadListLength > 0) - { - PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); - } + if (!loadStoreList.empty()) + PPCRecRA_insertGPRStoreInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); // load subranges for next segments subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - storeLoadListLength = 0; + loadStoreList.clear(); while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_END) @@ -986,31 +908,23 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML livenessTimeline.AddActiveRange(subrangeItr); // load GPR if (subrangeItr->_noLoad == false) - { - loadStoreList[storeLoadListLength].registerIndex = subrangeItr->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = subrangeItr->range->name; - storeLoadListLength++; - } + loadStoreList.emplace_back(subrangeItr); // update translation table - cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } - if (storeLoadListLength > 0) - { - PPCRecRA_insertGPRLoadInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); - } + if (!loadStoreList.empty()) + PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); } -void IMLRA_GenerateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx) { - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - IMLRA_GenerateSegmentInstructions(ppcImlGenContext, imlSegment); + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + IMLRA_GenerateSegmentMoveInstructions(ctx, imlSegment); } } @@ -1120,11 +1034,15 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext if (abstractRange->isProcessed) { // return already existing segment - return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; + raLivenessSubrange_t* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); + cemu_assert_debug(existingRange); + return existingRange; } abstractRange->isProcessed = true; // create subrange - cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); +#endif raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); // traverse forward if (abstractRange->usageEnd == RA_INTER_RANGE_END) @@ -1175,6 +1093,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext // take abstract range data and create LivenessRanges void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { + // convert abstract min-max ranges to liveness range objects auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); for (auto& it : segMap) { @@ -1184,18 +1103,10 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]); PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); } - - // create lookup table of ranges - raLivenessSubrange_t* vGPR2Subrange[IML_RA_VIRT_REG_COUNT_MAX]; - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) - { - vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; -#ifdef CEMU_DEBUG_ASSERT - if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) - assert_dbg(); -#endif - } - // parse instructions and convert to locations + // fill created ranges with read/write location indices + // note that at this point there is only one range per register per segment + // and the algorithm below relies on this + const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); size_t index = 0; IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) @@ -1203,15 +1114,15 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { IMLRegID gprId = gprReg.GetRegID(); - // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); + raLivenessSubrange_t* subrange = regToSubrange.find(gprId)->second; + PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT - if ((sint32)index < vGPR2Subrange[gprId]->start.index) + if ((sint32)index < subrange->start.index) { IMLRARegAbstractLiveness* dbgAbstractRange = _GetAbstractRange(ctx, imlSegment, gprId); assert_dbg(); } - if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index) + if ((sint32)index + 1 > subrange->end.index) assert_dbg(); #endif }); @@ -1275,7 +1186,7 @@ void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regI IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId); } -void _PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) +void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) { if (routeDepth >= 64) { @@ -1293,9 +1204,9 @@ void _PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegme if (distanceLeft > 0) { if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1); + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1); if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1); + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1); } return; } @@ -1331,9 +1242,9 @@ void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegmen IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1); + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1); if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1); + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1); } void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) @@ -1354,23 +1265,30 @@ void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IML void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { std::vector list_segments; - list_segments.reserve(1000); + std::vector list_processedSegment; + size_t segmentCount = ctx.deprGenContext->segmentList2.size(); + list_segments.reserve(segmentCount+1); + list_processedSegment.resize(segmentCount); + + auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {list_processedSegment[seg->momentaryIndex] = true; }; + auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { return list_processedSegment[seg->momentaryIndex]; }; + markSegProcessed(imlSegment); + sint32 index = 0; - imlSegment->raRangeExtendProcessed = true; list_segments.push_back(imlSegment); while (index < list_segments.size()) { IMLSegment* currentSegment = list_segments[index]; PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment); // follow flow - if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) + if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken)) { - currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; + markSegProcessed(currentSegment->nextSegmentBranchNotTaken); list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); } - if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) + if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken)) { - currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; + markSegProcessed(currentSegment->nextSegmentBranchTaken); list_segments.push_back(currentSegment->nextSegmentBranchTaken); } index++; @@ -1382,11 +1300,9 @@ void IMLRA_mergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; - if (imlSegment->list_prevSegments.empty()) - { - cemu_assert_debug(!imlSegment->raRangeExtendProcessed); // should not be processed yet - PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment); - } + if (!imlSegment->list_prevSegments.empty()) + continue; // not an entry/standalone segment + PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment); } } @@ -1500,7 +1416,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext IMLRA_AssignRegisters(ctx, ppcImlGenContext); IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); - IMLRA_GenerateMoveInstructions(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ctx); PPCRecRA_deleteAllRanges(ppcImlGenContext); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 071a1d5e8..8cdefe251 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -3,17 +3,24 @@ #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" -void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) { -#ifdef CEMU_DEBUG_ASSERT - if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister) - assert_dbg(); -#endif - subrange->link_sameVirtualRegisterGPR.next = *root; - if (*root) - (*root)->link_sameVirtualRegisterGPR.prev = subrange; - subrange->link_sameVirtualRegisterGPR.prev = nullptr; - *root = subrange; + IMLRegID regId = subrange->range->virtualRegister; + auto it = root.find(regId); + if (it == root.end()) + { + // new single element + root.try_emplace(regId, subrange); + subrange->link_sameVirtualRegisterGPR.prev = nullptr; + subrange->link_sameVirtualRegisterGPR.next = nullptr; + } + else + { + // insert in first position + subrange->link_sameVirtualRegisterGPR.next = it->second; + it->second = subrange; + subrange->link_sameVirtualRegisterGPR.prev = subrange; + } } void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) @@ -25,15 +32,28 @@ void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivene *root = subrange; } -void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) { - raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev; - if (subrange->link_sameVirtualRegisterGPR.prev) - subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; - else - (*root) = subrange->link_sameVirtualRegisterGPR.next; - if (subrange->link_sameVirtualRegisterGPR.next) - subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev; + IMLRegID regId = subrange->range->virtualRegister; + raLivenessSubrange_t* nextRange = subrange->link_sameVirtualRegisterGPR.next; + raLivenessSubrange_t* prevRange = subrange->link_sameVirtualRegisterGPR.prev; + raLivenessSubrange_t* newBase = prevRange ? prevRange : nextRange; + if (prevRange) + prevRange->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; + if (nextRange) + nextRange->link_sameVirtualRegisterGPR.prev = subrange->link_sameVirtualRegisterGPR.prev; + + if (!prevRange) + { + if (nextRange) + { + root.find(regId)->second = nextRange; + } + else + { + root.erase(regId); + } + } #ifdef CEMU_DEBUG_ASSERT subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1; subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1; @@ -87,7 +107,7 @@ raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenConte // add to range range->list_subranges.push_back(livenessSubrange); // add to segment - PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange); + PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, livenessSubrange); PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange); return livenessSubrange; } @@ -95,7 +115,7 @@ raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenConte void _unlinkSubrange(raLivenessSubrange_t* subrange) { IMLSegment* imlSegment = subrange->imlSegment; - PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange); + PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, subrange); PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 4e90d5299..a530c85f3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,8 +1,6 @@ #pragma once #include "IMLInstruction.h" -#define IML_RA_VIRT_REG_COUNT_MAX (40 + 32) // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic - struct IMLSegmentPoint { sint32 index; @@ -62,12 +60,12 @@ struct raLivenessRange_t struct PPCSegmentRegisterAllocatorInfo_t { - // analyzer stage - bool isPartOfProcessedLoop{}; // used during loop detection + // used during loop detection + bool isPartOfProcessedLoop{}; sint32 lastIterationIndex{}; // linked lists raLivenessSubrange_t* linkedList_allSubranges{}; - raLivenessSubrange_t* linkedList_perVirtualGPR[IML_RA_VIRT_REG_COUNT_MAX]{}; + std::unordered_map linkedList_perVirtualGPR2; }; struct IMLSegment @@ -92,9 +90,6 @@ struct IMLSegment // enterable segments bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true - // jump destination segments - //bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - //uint32 jumpDestinationPPCAddress{}; // PPC FPR use mask bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask @@ -103,8 +98,6 @@ struct IMLSegment uint32 crBitsWritten{}; // bits that are written in this segment // register allocator info PPCSegmentRegisterAllocatorInfo_t raInfo{}; - //PPCRecVGPRDistances_t raDistances{}; - bool raRangeExtendProcessed{}; // segment state API void SetEnterable(uint32 enterAddress); From b4f2f91d8726e56bed71faa2f97a1d6a7bef2254 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 6 Feb 2023 18:03:18 +0100 Subject: [PATCH 39/64] PPCRec: FPRs now use the shared register allocator --- .../Recompiler/BackendX64/BackendX64.cpp | 260 +++++-- .../Recompiler/BackendX64/BackendX64FPU.cpp | 6 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 4 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 78 +- .../Recompiler/IML/IMLInstruction.cpp | 76 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 51 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 718 +++++++++--------- .../Recompiler/IML/IMLRegisterAllocator.cpp | 2 +- .../Recompiler/IML/IMLRegisterAllocator.h | 3 +- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 143 ++-- .../HW/Espresso/Recompiler/PPCRecompiler.h | 7 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 1 - .../Recompiler/PPCRecompilerImlGen.cpp | 121 +-- src/util/helpers/StringBuf.h | 10 +- 15 files changed, 818 insertions(+), 664 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 7ba3d519f..6aeac2c1b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -11,13 +11,26 @@ static x86Assembler64::GPR32 _reg32(IMLReg physReg) { cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); - return (x86Assembler64::GPR32)physReg.GetRegID(); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return (x86Assembler64::GPR32)regId; } static uint32 _reg64(IMLReg physReg) { cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64); - return physReg.GetRegID(); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return regId; +} + +uint32 _regF64(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16); + regId -= IMLArchX86::PHYSREG_FPR_BASE; + return regId; } static x86Assembler64::GPR8_REX _reg8(IMLReg physReg) @@ -1233,111 +1246,192 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; - auto regR = _reg64(imlInstruction->op_r_name.regR); - - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) { - sint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); - else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); - else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + sint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + else if (sprIndex == SPR_CTR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + else if (sprIndex == SPR_XER) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); } else assert_dbg(); } - else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); - } - else if (name == PPCREC_NAME_XER_CA) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if (name == PPCREC_NAME_XER_SO) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); - } - else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_EA) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + auto regR = _regF64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + { + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } } else - assert_dbg(); + DEBUG_BREAK; + } void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; - auto regR = _reg64(imlInstruction->op_r_name.regR); - - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) + + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) { - uint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); - else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); - else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + uint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); + else if (sprIndex == SPR_CTR) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); + else if (sprIndex == SPR_XER) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); } else - assert_dbg(); - } - else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); - } - else if (name == PPCREC_NAME_XER_CA) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name == PPCREC_NAME_XER_SO) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_EA) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + assert_dbg(); } - else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); + auto regR = _regF64(imlInstruction->op_r_name.regR); + uint32 name = imlInstruction->op_r_name.name; + if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + { + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } } else - assert_dbg(); + DEBUG_BREAK; + + } +//void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +//{ +// uint32 name = imlInstruction->op_r_name.name; +// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); +// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) +// { +// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); +// } +// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) +// { +// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); +// } +// else +// { +// cemu_assert_debug(false); +// } +//} +// +//void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +//{ +// uint32 name = imlInstruction->op_r_name.name; +// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); +// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) +// { +// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); +// } +// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) +// { +// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); +// } +// else +// { +// cemu_assert_debug(false); +// } +//} + uint8* codeMemoryBlock = nullptr; sint32 codeMemoryBlockIndex = 0; sint32 codeMemoryBlockSize = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 0942842de..8db27e41e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -5,11 +5,7 @@ #include "asm/x64util.h" // for recompiler_fres / frsqrte -uint32 _regF64(IMLReg r) -{ - cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::F64); - return (uint32)r.GetRegID(); -} +uint32 _regF64(IMLReg physReg); uint32 _regI32(IMLReg r) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index f1820f570..0f1a0803b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -15,8 +15,8 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); // optimizer passes // todo - rename -bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +//bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +//bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 9511a5a7c..a6b4925c8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -41,22 +41,36 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return _tempOpcodename; } -void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) +std::string IMLDebug_GetRegName(IMLReg r) { - uint32 regId = virtualRegister.GetRegID(); - DEBUG_BREAK; // todo (print type) - if (isLast) + std::string regName; + uint32 regId = r.GetRegID(); + switch (r.GetRegFormat()) { - if (regId < 10) - strOutput.addFmt("t{} ", regId); - else - strOutput.addFmt("t{}", regId); - return; + case IMLRegFormat::F32: + regName.append("f"); + break; + case IMLRegFormat::F64: + regName.append("fd"); + break; + case IMLRegFormat::I32: + regName.append("i"); + break; + case IMLRegFormat::I64: + regName.append("r"); + break; + default: + __debugbreak(); } - if (regId < 10) - strOutput.addFmt("t{} , ", regId); - else - strOutput.addFmt("t{}, ", regId); + regName.append(fmt::format("{}", regId)); + return regName; +} + +void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) +{ + strOutput.add(IMLDebug_GetRegName(virtualRegister)); + if (!isLast) + strOutput.add(", "); } void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) @@ -149,12 +163,6 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "ukn"; } -std::string IMLDebug_GetRegName(IMLReg r) -{ - cemu_assert_unimplemented(); - return ""; -} - void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -197,19 +205,24 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { if (inst.type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("LD_NAME"); + strOutput.add("R_NAME"); else - strOutput.add("ST_NAME"); + strOutput.add("NAME_R"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); + if(inst.type == PPCREC_IML_TYPE_R_NAME) + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.addFmt("name_{} (", inst.op_r_name.regR.GetRegID()); + strOutput.add("name_"); if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + { + strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); + } else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); @@ -227,8 +240,15 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) strOutput.add("cpuReservation.value"); else - strOutput.add("ukn"); - strOutput.add(")"); + { + strOutput.addFmt("name_ukn{}", inst.op_r_name.name); + } + if (inst.type != PPCREC_IML_TYPE_R_NAME) + { + strOutput.add(", "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); + } + } else if (inst.type == PPCREC_IML_TYPE_R_R) { @@ -281,7 +301,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - strOutput.add("CJUMP2 "); + strOutput.add("CJUMP "); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); @@ -342,9 +362,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.registerMem2.GetRegID()); + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); } else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { @@ -366,7 +386,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) { - strOutput.addFmt("MACRO B_TO_REG t{}", inst.op_macro.param); + strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); } else if (inst.operation == PPCREC_IML_MACRO_BL) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 61939a244..d50ed1052 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -638,78 +638,64 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerMem2.IsValid()) - { - op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerMem2.IsValid()) - { - op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R) { + op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable); + op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { + op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable); + op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable); + op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - } - else if (type == PPCREC_IML_TYPE_FPR_R) - { + op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable); + op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable); + op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable); + op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { + op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable); + op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable); op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); } else diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index a3f0f652d..8b49cd22e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -388,6 +388,7 @@ struct IMLUsedRegisters template void ForEachAccessedGPR(Fn F) const { + // GPRs if (readGPR1.IsValid()) F(readGPR1, false); if (readGPR2.IsValid()) @@ -398,22 +399,33 @@ struct IMLUsedRegisters F(writtenGPR1, true); if (writtenGPR2.IsValid()) F(writtenGPR2, true); - } - - bool HasSameBaseFPRRegId(IMLRegID regId) const - { - if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) - return true; - if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) - return true; - if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) - return true; - if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) - return true; - if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) - return true; - return false; - } + // FPRs + if (readFPR1.IsValid()) + F(readFPR1, false); + if (readFPR2.IsValid()) + F(readFPR2, false); + if (readFPR3.IsValid()) + F(readFPR3, false); + if (readFPR4.IsValid()) + F(readFPR4, false); + if (writtenFPR1.IsValid()) + F(writtenFPR1, true); + } + + //bool HasSameBaseFPRRegId(IMLRegID regId) const + //{ + // if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) + // return true; + // if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) + // return true; + // if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) + // return true; + // if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) + // return true; + // if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) + // return true; + // return false; + //} }; struct IMLInstruction @@ -765,4 +777,11 @@ struct IMLInstruction void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced); +}; + +// architecture specific constants +namespace IMLArchX86 +{ + static constexpr int PHYSREG_GPR_BASE = 0; + static constexpr int PHYSREG_FPR_BASE = 16; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 568c0b791..b9449c949 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,369 +6,369 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -bool _RegExceedsFPRSpace(IMLReg r) -{ - if (r.IsInvalid()) - return false; - if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) - return true; - return false; -} +//bool _RegExceedsFPRSpace(IMLReg r) +//{ +// if (r.IsInvalid()) +// return false; +// if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) +// return true; +// return false; +//} IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); } -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - // only xmm0 to xmm14 may be used, xmm15 is reserved - // this method will reduce the number of fpr registers used - // inefficient algorithm for optimizing away excess registers - // we simply load, use and store excess registers into other unused registers when we need to - // first we remove all name load and store instructions that involve out-of-bounds registers - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - size_t imlIndex = 0; - while( imlIndex < segIt->imlList.size() ) - { - IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; - if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) - { - imlInstructionItr.make_no_op(); - } - } - imlIndex++; - } - } - // replace registers - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - size_t imlIndex = 0; - while( imlIndex < segIt->imlList.size() ) - { - IMLUsedRegisters registersUsed; - while( true ) - { - segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); - if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) - { - // get index of register to replace - sint32 fprToReplace = -1; - if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) - fprToReplace = registersUsed.readFPR1.GetRegID(); - else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) - fprToReplace = registersUsed.readFPR2.GetRegID(); - else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) - fprToReplace = registersUsed.readFPR3.GetRegID(); - else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) - fprToReplace = registersUsed.readFPR4.GetRegID(); - else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) - fprToReplace = registersUsed.writtenFPR1.GetRegID(); - if (fprToReplace >= 0) - { - // generate mask of useable registers - uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 - if (registersUsed.readFPR1.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); - if (registersUsed.readFPR2.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); - if (registersUsed.readFPR3.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); - if (registersUsed.readFPR4.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); - if (registersUsed.writtenFPR1.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); - // get highest unused register index (0-6 range) - sint32 unusedRegisterIndex = -1; - for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) - { - if (useableRegisterMask & (1 << f)) - { - unusedRegisterIndex = f; - } - } - if (unusedRegisterIndex == -1) - assert_dbg(); - // determine if the placeholder register is actually used (if not we must not load/store it) - uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - bool replacedRegisterIsUsed = true; - if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) - { - replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; - } - // replace registers that are out of range - segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); - // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); - // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); - // name_unusedRegister = unusedRegister - IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - } - else - imlInstructionItr->make_no_op(); - imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // name_gprToReplace = unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // unusedRegister = name_unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - } - else - imlInstructionItr->make_no_op(); - } - } - else - break; - } - imlIndex++; - } - } - return true; -} - -typedef struct -{ - bool isActive; - uint32 virtualReg; - sint32 lastUseIndex; -}ppcRecRegisterMapping_t; - -typedef struct -{ - ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; - sint32 ppcRegToMapping[64]; - sint32 currentUseIndex; -}ppcRecManageRegisters_t; - -ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) -{ - // find free register - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - { - rCtx->currentMapping[i].isActive = true; - rCtx->currentMapping[i].virtualReg = -1; - rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; - return rCtx->currentMapping + i; - } - } - // all registers are used - return nullptr; -} - -ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) -{ - // find unloadable register (with lowest lastUseIndex) - sint32 unloadIndex = -1; - sint32 unloadIndexLastUse = 0x7FFFFFFF; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - continue; - if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); - if (isReserved) - continue; - if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) - { - unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; - unloadIndex = i; - } - } - cemu_assert(unloadIndex != -1); - return rCtx->currentMapping + unloadIndex; -} - -bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) -{ - ppcRecManageRegisters_t rCtx = { 0 }; - for (sint32 i = 0; i < 64; i++) - rCtx.ppcRegToMapping[i] = -1; - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; - size_t idx = 0; - sint32 currentUseIndex = 0; - IMLUsedRegisters registersUsed; - while (idx < imlSegment->imlList.size()) - { - IMLInstruction& idxInst = imlSegment->imlList[idx]; - if (idxInst.IsSuffixInstruction()) - break; - idxInst.CheckRegisterUsage(®istersUsed); - IMLReg fprMatch[4]; - IMLReg fprReplace[4]; - fprMatch[0] = IMLREG_INVALID; - fprMatch[1] = IMLREG_INVALID; - fprMatch[2] = IMLREG_INVALID; - fprMatch[3] = IMLREG_INVALID; - fprReplace[0] = IMLREG_INVALID; - fprReplace[1] = IMLREG_INVALID; - fprReplace[2] = IMLREG_INVALID; - fprReplace[3] = IMLREG_INVALID; - // generate a mask of registers that we may not free - sint32 numReplacedOperands = 0; - uint32 unloadLockedMask = 0; - for (sint32 f = 0; f < 5; f++) - { - IMLReg virtualFpr; - if (f == 0) - virtualFpr = registersUsed.readFPR1; - else if (f == 1) - virtualFpr = registersUsed.readFPR2; - else if (f == 2) - virtualFpr = registersUsed.readFPR3; - else if (f == 3) - virtualFpr = registersUsed.readFPR4; - else if (f == 4) - virtualFpr = registersUsed.writtenFPR1; - if(virtualFpr.IsInvalid()) - continue; - cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); - cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); - cemu_assert_debug(virtualFpr.GetRegID() < 64); - // check if this virtual FPR is already loaded in any real register - ppcRecRegisterMapping_t* regMapping; - if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) - { - // not loaded - // find available register - while (true) - { - regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); - if (regMapping == NULL) - { - // unload least recently used register and try again - ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); - // mark as locked - unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); - // create unload instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; - idx++; - // update mapping - unloadRegMapping->isActive = false; - rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; - } - else - break; - } - // create load instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; - idx++; - // update mapping - regMapping->virtualReg = virtualFpr.GetRegID(); - rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - else - { - regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - // replace FPR - bool entryFound = false; - for (sint32 t = 0; t < numReplacedOperands; t++) - { - if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) - { - cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); - entryFound = true; - break; - } - } - if (entryFound == false) - { - cemu_assert_debug(numReplacedOperands != 4); - fprMatch[numReplacedOperands] = virtualFpr; - fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); - numReplacedOperands++; - } - } - if (numReplacedOperands > 0) - { - imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); - } - // next - idx++; - } - // count loaded registers - sint32 numLoadedRegisters = 0; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive) - numLoadedRegisters++; - } - // store all loaded registers - if (numLoadedRegisters > 0) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive == false) - continue; - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; - idx++; - } - } - return true; -} - -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) - { - if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) - return false; - } - return true; -} +//bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) +//{ +// // only xmm0 to xmm14 may be used, xmm15 is reserved +// // this method will reduce the number of fpr registers used +// // inefficient algorithm for optimizing away excess registers +// // we simply load, use and store excess registers into other unused registers when we need to +// // first we remove all name load and store instructions that involve out-of-bounds registers +// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) +// { +// size_t imlIndex = 0; +// while( imlIndex < segIt->imlList.size() ) +// { +// IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; +// if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) +// { +// if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) +// { +// imlInstructionItr.make_no_op(); +// } +// } +// imlIndex++; +// } +// } +// // replace registers +// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) +// { +// size_t imlIndex = 0; +// while( imlIndex < segIt->imlList.size() ) +// { +// IMLUsedRegisters registersUsed; +// while( true ) +// { +// segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); +// if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) +// { +// // get index of register to replace +// sint32 fprToReplace = -1; +// if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) +// fprToReplace = registersUsed.readFPR1.GetRegID(); +// else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) +// fprToReplace = registersUsed.readFPR2.GetRegID(); +// else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) +// fprToReplace = registersUsed.readFPR3.GetRegID(); +// else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) +// fprToReplace = registersUsed.readFPR4.GetRegID(); +// else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) +// fprToReplace = registersUsed.writtenFPR1.GetRegID(); +// if (fprToReplace >= 0) +// { +// // generate mask of useable registers +// uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 +// if (registersUsed.readFPR1.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); +// if (registersUsed.readFPR2.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); +// if (registersUsed.readFPR3.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); +// if (registersUsed.readFPR4.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); +// if (registersUsed.writtenFPR1.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); +// // get highest unused register index (0-6 range) +// sint32 unusedRegisterIndex = -1; +// for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) +// { +// if (useableRegisterMask & (1 << f)) +// { +// unusedRegisterIndex = f; +// } +// } +// if (unusedRegisterIndex == -1) +// assert_dbg(); +// // determine if the placeholder register is actually used (if not we must not load/store it) +// uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// bool replacedRegisterIsUsed = true; +// if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) +// { +// replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; +// } +// // replace registers that are out of range +// segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); +// // add load/store name after instruction +// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); +// // add load/store before current instruction +// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); +// // name_unusedRegister = unusedRegister +// IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// if (replacedRegisterIsUsed) +// { +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// } +// else +// imlInstructionItr->make_no_op(); +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; +// // name_gprToReplace = unusedRegister +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; +// // unusedRegister = name_unusedRegister +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// if (replacedRegisterIsUsed) +// { +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// } +// else +// imlInstructionItr->make_no_op(); +// } +// } +// else +// break; +// } +// imlIndex++; +// } +// } +// return true; +//} +// +//typedef struct +//{ +// bool isActive; +// uint32 virtualReg; +// sint32 lastUseIndex; +//}ppcRecRegisterMapping_t; +// +//typedef struct +//{ +// ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; +// sint32 ppcRegToMapping[64]; +// sint32 currentUseIndex; +//}ppcRecManageRegisters_t; +// +//ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) +//{ +// // find free register +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx->currentMapping[i].isActive == false) +// { +// rCtx->currentMapping[i].isActive = true; +// rCtx->currentMapping[i].virtualReg = -1; +// rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; +// return rCtx->currentMapping + i; +// } +// } +// // all registers are used +// return nullptr; +//} +// +//ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) +//{ +// // find unloadable register (with lowest lastUseIndex) +// sint32 unloadIndex = -1; +// sint32 unloadIndexLastUse = 0x7FFFFFFF; +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx->currentMapping[i].isActive == false) +// continue; +// if( (unloadLockedMask&(1<currentMapping[i].virtualReg; +// bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); +// if (isReserved) +// continue; +// if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) +// { +// unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; +// unloadIndex = i; +// } +// } +// cemu_assert(unloadIndex != -1); +// return rCtx->currentMapping + unloadIndex; +//} +// +//bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) +//{ +// ppcRecManageRegisters_t rCtx = { 0 }; +// for (sint32 i = 0; i < 64; i++) +// rCtx.ppcRegToMapping[i] = -1; +// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; +// size_t idx = 0; +// sint32 currentUseIndex = 0; +// IMLUsedRegisters registersUsed; +// while (idx < imlSegment->imlList.size()) +// { +// IMLInstruction& idxInst = imlSegment->imlList[idx]; +// if (idxInst.IsSuffixInstruction()) +// break; +// idxInst.CheckRegisterUsage(®istersUsed); +// IMLReg fprMatch[4]; +// IMLReg fprReplace[4]; +// fprMatch[0] = IMLREG_INVALID; +// fprMatch[1] = IMLREG_INVALID; +// fprMatch[2] = IMLREG_INVALID; +// fprMatch[3] = IMLREG_INVALID; +// fprReplace[0] = IMLREG_INVALID; +// fprReplace[1] = IMLREG_INVALID; +// fprReplace[2] = IMLREG_INVALID; +// fprReplace[3] = IMLREG_INVALID; +// // generate a mask of registers that we may not free +// sint32 numReplacedOperands = 0; +// uint32 unloadLockedMask = 0; +// for (sint32 f = 0; f < 5; f++) +// { +// IMLReg virtualFpr; +// if (f == 0) +// virtualFpr = registersUsed.readFPR1; +// else if (f == 1) +// virtualFpr = registersUsed.readFPR2; +// else if (f == 2) +// virtualFpr = registersUsed.readFPR3; +// else if (f == 3) +// virtualFpr = registersUsed.readFPR4; +// else if (f == 4) +// virtualFpr = registersUsed.writtenFPR1; +// if(virtualFpr.IsInvalid()) +// continue; +// cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); +// cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); +// cemu_assert_debug(virtualFpr.GetRegID() < 64); +// // check if this virtual FPR is already loaded in any real register +// ppcRecRegisterMapping_t* regMapping; +// if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) +// { +// // not loaded +// // find available register +// while (true) +// { +// regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); +// if (regMapping == NULL) +// { +// // unload least recently used register and try again +// ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); +// // mark as locked +// unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); +// // create unload instruction +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; +// idx++; +// // update mapping +// unloadRegMapping->isActive = false; +// rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; +// } +// else +// break; +// } +// // create load instruction +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; +// idx++; +// // update mapping +// regMapping->virtualReg = virtualFpr.GetRegID(); +// rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); +// regMapping->lastUseIndex = rCtx.currentUseIndex; +// rCtx.currentUseIndex++; +// } +// else +// { +// regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; +// regMapping->lastUseIndex = rCtx.currentUseIndex; +// rCtx.currentUseIndex++; +// } +// // replace FPR +// bool entryFound = false; +// for (sint32 t = 0; t < numReplacedOperands; t++) +// { +// if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) +// { +// cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); +// entryFound = true; +// break; +// } +// } +// if (entryFound == false) +// { +// cemu_assert_debug(numReplacedOperands != 4); +// fprMatch[numReplacedOperands] = virtualFpr; +// fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); +// numReplacedOperands++; +// } +// } +// if (numReplacedOperands > 0) +// { +// imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); +// } +// // next +// idx++; +// } +// // count loaded registers +// sint32 numLoadedRegisters = 0; +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx.currentMapping[i].isActive) +// numLoadedRegisters++; +// } +// // store all loaded registers +// if (numLoadedRegisters > 0) +// { +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx.currentMapping[i].isActive == false) +// continue; +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; +// idx++; +// } +// } +// return true; +//} +// +//bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) +//{ +// for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) +// { +// if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) +// return false; +// } +// return true; +//} /* @@ -663,11 +663,13 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont } } +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg); + sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg) { if (gqrReg.IsInvalid()) return -1; - sint32 namedReg = ppcImlGenContext->mappedRegister[gqrReg.GetRegID()]; + sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg); if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7)) { return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 4b6100f72..2fb55c801 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1100,7 +1100,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML if(it.second.isProcessed) continue; IMLRegID regId = it.first; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]); + raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second); PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); } // fill created ranges with read/write location indices diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 5e0d0f044..52b203970 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -93,7 +93,8 @@ struct IMLRegisterAllocatorParameters return perTypePhysPool[stdx::to_underlying(regFormat)]; } - IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];// physicalRegisterPool; + IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)]; + std::unordered_map regIdToName; }; void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index a530c85f3..70151422e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -91,7 +91,7 @@ struct IMLSegment bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true // PPC FPR use mask - bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR + //bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) uint32 crBitsRead{}; // all bits that are read in this segment diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index b040275e3..2d1535fcb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -206,8 +206,19 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // return nullptr; //} - // Large functions for testing (botw): - // 3B4049C + //if (ppcRecFunc->ppcAddress == 0x03C26844) + //{ + // __debugbreak(); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + // 31A8778 + + // Functions for testing (botw): + // 3B4049C (large with switch case) + // 30BF118 (has a bndz copy loop + some float instructions at the end) + + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); @@ -217,8 +228,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } // collect list of PPC-->x64 entry points - cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); - entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { @@ -230,6 +239,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } + + cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + return ppcRecFunc; } @@ -242,72 +254,85 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // if GQRs can be predicted, optimize PSQ load/stores PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - // count number of used registers - uint32 numLoadedFPRRegisters = 0; - for (uint32 i = 0; i < 255; i++) - { - if (ppcImlGenContext.mappedFPRRegister[i]) - numLoadedFPRRegisters++; - } - // insert name store instructions at the end of each segment but before branch instructions - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if (segIt->imlList.size() == 0) - continue; // ignore empty segments - // analyze segment for register usage - IMLUsedRegisters registersUsed; - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - segIt->imlList[i].CheckRegisterUsage(®istersUsed); - IMLReg accessedTempReg[5]; - // intermediate FPRs - accessedTempReg[0] = registersUsed.readFPR1; - accessedTempReg[1] = registersUsed.readFPR2; - accessedTempReg[2] = registersUsed.readFPR3; - accessedTempReg[3] = registersUsed.readFPR4; - accessedTempReg[4] = registersUsed.writtenFPR1; - for (sint32 f = 0; f < 5; f++) - { - if (accessedTempReg[f].IsInvalid()) - continue; - uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; - if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) - { - segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - } - } - } - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if (segIt->imlList.size() == 0) + // continue; // ignore empty segments + // // analyze segment for register usage + // IMLUsedRegisters registersUsed; + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // segIt->imlList[i].CheckRegisterUsage(®istersUsed); + // IMLReg accessedTempReg[5]; + // // intermediate FPRs + // accessedTempReg[0] = registersUsed.readFPR1; + // accessedTempReg[1] = registersUsed.readFPR2; + // accessedTempReg[2] = registersUsed.readFPR3; + // accessedTempReg[3] = registersUsed.readFPR4; + // accessedTempReg[4] = registersUsed.writtenFPR1; + // for (sint32 f = 0; f < 5; f++) + // { + // if (accessedTempReg[f].IsInvalid()) + // continue; + // uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; + // if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) + // { + // segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; + // } + // } + // } + //} // merge certain float load+store patterns (must happen before FPR register remapping) PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); // delay byte swapping for certain load+store patterns PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - if (numLoadedFPRRegisters > 0) - { - if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - { - return false; - } - } + //if (numLoadedFPRRegisters > 0) + //{ + // if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) + // { + // return false; + // } + //} IMLRegisterAllocatorParameters raParam; + for (auto& it : ppcImlGenContext.mappedRegs) + raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first); + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); - gprPhysPool.SetAvailable(X86_REG_RAX); - gprPhysPool.SetAvailable(X86_REG_RDX); - gprPhysPool.SetAvailable(X86_REG_RBX); - gprPhysPool.SetAvailable(X86_REG_RBP); - gprPhysPool.SetAvailable(X86_REG_RSI); - gprPhysPool.SetAvailable(X86_REG_RDI); - gprPhysPool.SetAvailable(X86_REG_R8); - gprPhysPool.SetAvailable(X86_REG_R9); - gprPhysPool.SetAvailable(X86_REG_R10); - gprPhysPool.SetAvailable(X86_REG_R11); - gprPhysPool.SetAvailable(X86_REG_R12); - gprPhysPool.SetAvailable(X86_REG_RCX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX); + + // add XMM registers, except XMM15 which is the temporary register + auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 7f9817aac..080ce2fa0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -42,9 +42,12 @@ struct ppcImlGenContext_t // cycle counter uint32 cyclesSinceLastBranch; // used to track ppc cycles // temporary general purpose registers - uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; + //uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; // temporary floating point registers (single and double precision) - uint32 mappedFPRRegister[256]; + //uint32 mappedFPRRegister[256]; + + std::unordered_map mappedRegs; + // list of segments std::vector segmentList2; // code generation control diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 042cf0675..5d30267d5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -102,4 +102,3 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); - diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f89edfe35..f474b0156 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -134,74 +134,73 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P basicBlockInfo.appendSegment = segMerge; } -uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) -{ - if( mappedName == PPCREC_NAME_NONE ) +IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat) +{ + auto it = ppcImlGenContext->mappedRegs.find(mappedName); + if (it != ppcImlGenContext->mappedRegs.end()) + return it->second; + // create new reg entry + IMLRegFormat baseFormat; + if (regFormat == IMLRegFormat::F64) + baseFormat = IMLRegFormat::F64; + else if (regFormat == IMLRegFormat::I32) + baseFormat = IMLRegFormat::I64; + else { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; + cemu_assert_suspicious(); } - for(uint32 i=0; i<(PPC_REC_MAX_VIRTUAL_GPR-1); i++) - { - if( ppcImlGenContext->mappedRegister[i] == PPCREC_NAME_NONE ) - { - ppcImlGenContext->mappedRegister[i] = mappedName; - return i; - } - } - return 0; + IMLRegID newRegId = ppcImlGenContext->mappedRegs.size(); + IMLReg newReg(baseFormat, regFormat, 0, newRegId); + ppcImlGenContext->mappedRegs.try_emplace(mappedName, newReg); + return newReg; } -uint32 PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg) { - for(uint32 i=0; i< PPC_REC_MAX_VIRTUAL_GPR; i++) + for (auto& it : ppcImlGenContext->mappedRegs) { - if( ppcImlGenContext->mappedRegister[i] == mappedName ) - { - return i; - } + if (it.second.GetRegID() == reg.GetRegID()) + return it.first; } - return PPC_REC_INVALID_REGISTER; + cemu_assert(false); + return 0; } uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - if( mappedName == PPCREC_NAME_NONE ) - { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; - } - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) - { - ppcImlGenContext->mappedFPRRegister[i] = mappedName; - return i; - } - } + __debugbreak(); + //if( mappedName == PPCREC_NAME_NONE ) + //{ + // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); + // return PPC_REC_INVALID_REGISTER; + //} + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) + // { + // ppcImlGenContext->mappedFPRRegister[i] = mappedName; + // return i; + // } + //} return 0; } uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) - { - return i; - } - } + __debugbreak(); + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) + // { + // return i; + // } + //} return PPC_REC_INVALID_REGISTER; } IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER) - return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, loadedRegisterIndex); - - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::I32); } IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) @@ -243,14 +242,15 @@ IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) */ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) { - if( loadNew == false ) - { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - } - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + //if( loadNew == false ) + //{ + // uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); + // if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) + // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); + //} + //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); + //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } /* @@ -259,11 +259,12 @@ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, */ IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + //uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); + //if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) + // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); + //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); + //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) diff --git a/src/util/helpers/StringBuf.h b/src/util/helpers/StringBuf.h index 432fa7a1d..8b34e54f7 100644 --- a/src/util/helpers/StringBuf.h +++ b/src/util/helpers/StringBuf.h @@ -44,7 +44,8 @@ class StringBuf void add(std::string_view appendedStr) { - size_t remainingLen = this->limit - this->length; + if (this->length + appendedStr.size() + 1 >= this->limit) + _reserve(std::max(this->length + appendedStr.size() + 64, this->limit + this->limit / 2)); size_t copyLen = appendedStr.size(); if (remainingLen < copyLen) copyLen = remainingLen; @@ -80,6 +81,13 @@ class StringBuf } private: + void _reserve(uint32 newLimit) + { + cemu_assert_debug(newLimit > length); + this->str = (uint8*)realloc(this->str, newLimit + 4); + this->limit = newLimit; + } + uint8* str; uint32 length; /* in bytes */ uint32 limit; /* in bytes */ From e5717fb1a84776e3a7d7ad093b61272be5ea9875 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 13 Mar 2023 05:10:34 +0100 Subject: [PATCH 40/64] PPCRec: Implement MFCR and MTCRF --- .../Recompiler/BackendX64/BackendX64.cpp | 33 - .../Recompiler/BackendX64/BackendX64FPU.cpp | 36 - src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 17 +- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 38 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 32 +- .../Recompiler/IML/IMLInstruction.cpp | 41 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 18 - .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 615 +----------------- .../IML/IMLRegisterAllocatorRanges.cpp | 1 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 15 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 73 +-- .../Recompiler/PPCRecompilerImlGen.cpp | 62 +- 12 files changed, 60 insertions(+), 921 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 6aeac2c1b..ffd635a03 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -680,31 +680,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) - { - DEBUG_BREAK; - //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; - //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - //for(sint32 f=0; f<32; f++) - //{ - // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - // x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - //} - } - else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - DEBUG_BREAK; - //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; - //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - //for (sint32 f = 0; f < 32; f++) - //{ - // if(((crBitMask >> f) & 1) == 0) - // continue; - // x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - // x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - // x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - //} - } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); @@ -1582,14 +1557,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { // no op } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) { if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 8db27e41e..cff46a2d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -34,42 +34,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR) { // load GQR diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 0f1a0803b..b58fdfa8d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -4,24 +4,13 @@ #include "IMLSegment.h" // analyzer -struct PPCRecCRTracking_t -{ - uint32 readCRBits; - uint32 writtenCRBits; -}; - bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); -bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); // optimizer passes -// todo - rename -//bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -//bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); // debug -void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 1b348c4cf..77403e1b8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -52,40 +52,4 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) } } return false; -} - -/* -* Returns true if the instruction can overwrite CR (depending on value of ->crRegister) -*/ -bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - return true; - - // new instructions - if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) - return true; - - - return false; -} +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index a6b4925c8..d295f0aa8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -60,7 +60,7 @@ std::string IMLDebug_GetRegName(IMLReg r) regName.append("r"); break; default: - __debugbreak(); + DEBUG_BREAK; } regName.append(fmt::format("{}", regId)); return regName; @@ -417,36 +417,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt("MACRO ukn operation {}", inst.operation); } } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME) - { - strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR.GetRegID(), inst.op_r_name.name); - if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if (inst.type == PPCREC_IML_TYPE_FPR_NAME_R) - { - strOutput.addFmt("name_{} (", inst.op_r_name.name); - if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && inst.op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0 + 999)) - { - strOutput.addFmt("tempFpr{}", inst.op_r_name.name - PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.addFmt(") = {}", IMLDebug_GetRegName(inst.op_r_name.regR)); - } else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) { strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index d50ed1052..f2476e612 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -56,12 +56,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation != PPCREC_IML_OP_OR && operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these - if (operation == PPCREC_IML_OP_MTCRF) - { - // operand register is read only - registersUsed->readGPR1 = op_r_immS32.regR; - } - else if (operation == PPCREC_IML_OP_LEFT_ROTATE) + if (operation == PPCREC_IML_OP_LEFT_ROTATE) { // operand register is read and write registersUsed->readGPR1 = op_r_immS32.regR; @@ -221,16 +216,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - // fpr operation - registersUsed->writtenFPR1 = op_r_name.regR; - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - // fpr operation - registersUsed->readFPR1 = op_r_name.regR; - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { // fpr load operation @@ -636,14 +621,6 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); @@ -766,14 +743,6 @@ void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegist { ; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); @@ -885,14 +854,6 @@ void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegist { ; } - else if (type == PPCREC_IML_TYPE_FPR_R_NAME) - { - op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_NAME_R) - { - op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced); - } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 8b49cd22e..817fef190 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -125,8 +125,6 @@ enum PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 - PPCREC_IML_OP_MFCR, // copy cr to gpr - PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -253,8 +251,6 @@ enum PPCREC_IML_TYPE_CONDITIONAL_R_S32, // FPR - PPCREC_IML_TYPE_FPR_R_NAME, // name = f* - PPCREC_IML_TYPE_FPR_NAME_R, // f* = name PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) @@ -412,20 +408,6 @@ struct IMLUsedRegisters F(writtenFPR1, true); } - //bool HasSameBaseFPRRegId(IMLRegID regId) const - //{ - // if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) - // return true; - // if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) - // return true; - // if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) - // return true; - // if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) - // return true; - // if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) - // return true; - // return false; - //} }; struct IMLInstruction diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index b9449c949..cdf922ce7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,492 +6,11 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -//bool _RegExceedsFPRSpace(IMLReg r) -//{ -// if (r.IsInvalid()) -// return false; -// if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) -// return true; -// return false; -//} - IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); } -//bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -//{ -// // only xmm0 to xmm14 may be used, xmm15 is reserved -// // this method will reduce the number of fpr registers used -// // inefficient algorithm for optimizing away excess registers -// // we simply load, use and store excess registers into other unused registers when we need to -// // first we remove all name load and store instructions that involve out-of-bounds registers -// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) -// { -// size_t imlIndex = 0; -// while( imlIndex < segIt->imlList.size() ) -// { -// IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; -// if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) -// { -// if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) -// { -// imlInstructionItr.make_no_op(); -// } -// } -// imlIndex++; -// } -// } -// // replace registers -// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) -// { -// size_t imlIndex = 0; -// while( imlIndex < segIt->imlList.size() ) -// { -// IMLUsedRegisters registersUsed; -// while( true ) -// { -// segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); -// if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) -// { -// // get index of register to replace -// sint32 fprToReplace = -1; -// if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) -// fprToReplace = registersUsed.readFPR1.GetRegID(); -// else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) -// fprToReplace = registersUsed.readFPR2.GetRegID(); -// else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) -// fprToReplace = registersUsed.readFPR3.GetRegID(); -// else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) -// fprToReplace = registersUsed.readFPR4.GetRegID(); -// else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) -// fprToReplace = registersUsed.writtenFPR1.GetRegID(); -// if (fprToReplace >= 0) -// { -// // generate mask of useable registers -// uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 -// if (registersUsed.readFPR1.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); -// if (registersUsed.readFPR2.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); -// if (registersUsed.readFPR3.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); -// if (registersUsed.readFPR4.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); -// if (registersUsed.writtenFPR1.IsValid()) -// useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); -// // get highest unused register index (0-6 range) -// sint32 unusedRegisterIndex = -1; -// for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) -// { -// if (useableRegisterMask & (1 << f)) -// { -// unusedRegisterIndex = f; -// } -// } -// if (unusedRegisterIndex == -1) -// assert_dbg(); -// // determine if the placeholder register is actually used (if not we must not load/store it) -// uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// bool replacedRegisterIsUsed = true; -// if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) -// { -// replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; -// } -// // replace registers that are out of range -// segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); -// // add load/store name after instruction -// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); -// // add load/store before current instruction -// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); -// // name_unusedRegister = unusedRegister -// IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// if (replacedRegisterIsUsed) -// { -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// } -// else -// imlInstructionItr->make_no_op(); -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; -// // name_gprToReplace = unusedRegister -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; -// // unusedRegister = name_unusedRegister -// imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); -// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); -// if (replacedRegisterIsUsed) -// { -// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); -// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; -// } -// else -// imlInstructionItr->make_no_op(); -// } -// } -// else -// break; -// } -// imlIndex++; -// } -// } -// return true; -//} -// -//typedef struct -//{ -// bool isActive; -// uint32 virtualReg; -// sint32 lastUseIndex; -//}ppcRecRegisterMapping_t; -// -//typedef struct -//{ -// ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; -// sint32 ppcRegToMapping[64]; -// sint32 currentUseIndex; -//}ppcRecManageRegisters_t; -// -//ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) -//{ -// // find free register -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx->currentMapping[i].isActive == false) -// { -// rCtx->currentMapping[i].isActive = true; -// rCtx->currentMapping[i].virtualReg = -1; -// rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; -// return rCtx->currentMapping + i; -// } -// } -// // all registers are used -// return nullptr; -//} -// -//ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) -//{ -// // find unloadable register (with lowest lastUseIndex) -// sint32 unloadIndex = -1; -// sint32 unloadIndexLastUse = 0x7FFFFFFF; -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx->currentMapping[i].isActive == false) -// continue; -// if( (unloadLockedMask&(1<currentMapping[i].virtualReg; -// bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); -// if (isReserved) -// continue; -// if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) -// { -// unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; -// unloadIndex = i; -// } -// } -// cemu_assert(unloadIndex != -1); -// return rCtx->currentMapping + unloadIndex; -//} -// -//bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) -//{ -// ppcRecManageRegisters_t rCtx = { 0 }; -// for (sint32 i = 0; i < 64; i++) -// rCtx.ppcRegToMapping[i] = -1; -// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; -// size_t idx = 0; -// sint32 currentUseIndex = 0; -// IMLUsedRegisters registersUsed; -// while (idx < imlSegment->imlList.size()) -// { -// IMLInstruction& idxInst = imlSegment->imlList[idx]; -// if (idxInst.IsSuffixInstruction()) -// break; -// idxInst.CheckRegisterUsage(®istersUsed); -// IMLReg fprMatch[4]; -// IMLReg fprReplace[4]; -// fprMatch[0] = IMLREG_INVALID; -// fprMatch[1] = IMLREG_INVALID; -// fprMatch[2] = IMLREG_INVALID; -// fprMatch[3] = IMLREG_INVALID; -// fprReplace[0] = IMLREG_INVALID; -// fprReplace[1] = IMLREG_INVALID; -// fprReplace[2] = IMLREG_INVALID; -// fprReplace[3] = IMLREG_INVALID; -// // generate a mask of registers that we may not free -// sint32 numReplacedOperands = 0; -// uint32 unloadLockedMask = 0; -// for (sint32 f = 0; f < 5; f++) -// { -// IMLReg virtualFpr; -// if (f == 0) -// virtualFpr = registersUsed.readFPR1; -// else if (f == 1) -// virtualFpr = registersUsed.readFPR2; -// else if (f == 2) -// virtualFpr = registersUsed.readFPR3; -// else if (f == 3) -// virtualFpr = registersUsed.readFPR4; -// else if (f == 4) -// virtualFpr = registersUsed.writtenFPR1; -// if(virtualFpr.IsInvalid()) -// continue; -// cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); -// cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); -// cemu_assert_debug(virtualFpr.GetRegID() < 64); -// // check if this virtual FPR is already loaded in any real register -// ppcRecRegisterMapping_t* regMapping; -// if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) -// { -// // not loaded -// // find available register -// while (true) -// { -// regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); -// if (regMapping == NULL) -// { -// // unload least recently used register and try again -// ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); -// // mark as locked -// unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); -// // create unload instruction -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; -// idx++; -// // update mapping -// unloadRegMapping->isActive = false; -// rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; -// } -// else -// break; -// } -// // create load instruction -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; -// idx++; -// // update mapping -// regMapping->virtualReg = virtualFpr.GetRegID(); -// rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); -// regMapping->lastUseIndex = rCtx.currentUseIndex; -// rCtx.currentUseIndex++; -// } -// else -// { -// regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; -// regMapping->lastUseIndex = rCtx.currentUseIndex; -// rCtx.currentUseIndex++; -// } -// // replace FPR -// bool entryFound = false; -// for (sint32 t = 0; t < numReplacedOperands; t++) -// { -// if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) -// { -// cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); -// entryFound = true; -// break; -// } -// } -// if (entryFound == false) -// { -// cemu_assert_debug(numReplacedOperands != 4); -// fprMatch[numReplacedOperands] = virtualFpr; -// fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); -// numReplacedOperands++; -// } -// } -// if (numReplacedOperands > 0) -// { -// imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); -// } -// // next -// idx++; -// } -// // count loaded registers -// sint32 numLoadedRegisters = 0; -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx.currentMapping[i].isActive) -// numLoadedRegisters++; -// } -// // store all loaded registers -// if (numLoadedRegisters > 0) -// { -// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); -// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) -// { -// if (rCtx.currentMapping[i].isActive == false) -// continue; -// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; -// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); -// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; -// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; -// imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); -// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; -// idx++; -// } -// } -// return true; -//} -// -//bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -//{ -// for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) -// { -// if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) -// return false; -// } -// return true; -//} - - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLReg registerIndex = nameStoreInstruction->op_r_name.regR; - for(size_t i=startIndex; iimlList.size(); i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readGPR1 == registerIndex || registersUsed.readGPR2 == registerIndex || registersUsed.readGPR3 == registerIndex ) - return false; - if (registersUsed.IsBaseGPRWritten(registerIndex)) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); - for(size_t i=startIndex; iimlList.size(); i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.readFPR1.IsValidAndSameRegID(regId) || registersUsed.readFPR2.IsValidAndSameRegID(regId) || registersUsed.readFPR3.IsValidAndSameRegID(regId) || registersUsed.readFPR4.IsValidAndSameRegID(regId)) - return false; - if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId) ) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded name is never changed - */ -bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLReg regR = nameStoreInstruction->op_r_name.regR; - for(sint32 i=startIndex; i>=0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.IsBaseGPRWritten(regR) ) - { - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) - return true; - return false; - } - } - return false; -} - -sint32 debugCallCounter1 = 0; - -/* - * Returns true if the name is overwritten in the current or any following segments - */ -bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - uint32 name = nameStoreInstruction->op_r_name.name; - for(size_t i=startIndex; iimlList.size(); i++) - { - const IMLInstruction& imlInstruction = imlSegment->imlList[i]; - if(imlInstruction.type == PPCREC_IML_TYPE_R_NAME ) - { - // name is loaded before being written - if (imlInstruction.op_r_name.name == name) - return false; - } - else if(imlInstruction.type == PPCREC_IML_TYPE_NAME_R ) - { - // name is written before being loaded - if (imlInstruction.op_r_name.name == name) - return true; - } - } - if( scanDepth >= 2 ) - return false; - if( imlSegment->nextSegmentIsUncertain ) - return false; - if( imlSegment->nextSegmentBranchTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchNotTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchTaken == nullptr && imlSegment->nextSegmentBranchNotTaken == nullptr) - return false; - - return true; -} - -/* - * Returns true if the loaded FPR name is never changed - */ -bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth) -{ - IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID(); - for(sint32 i=startIndex; i>=0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLUsedRegisters registersUsed; - imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId)) - { - if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - return true; - return false; - } - } - // todo: Scan next segment(s) - return false; -} - void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg) { IMLRegID fprIndex = fprReg.GetRegID(); @@ -564,7 +83,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI * Keeps denormals and other special float values intact * Slightly improves performance */ -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) +void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { @@ -648,7 +167,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp * Advantages: * Slightly improves performance */ -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) +void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) { for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { @@ -809,133 +328,3 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) } } } - -///* -// * Returns true if registerWrite overwrites any of the registers read by registerRead -// */ -//bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) -//{ -// if (registerWrite->writtenNamedReg1 < 0) -// return false; -// -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) -// return true; -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) -// return true; -// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) -// return true; -// return false; -//} - -void _reorderConditionModifyInstructions(IMLSegment* imlSegment) -{ -// IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); -// // last instruction is a conditional branch? -// if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) -// return; -// if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) -// return; -// // get CR bitmask of bit required for conditional jump -// PPCRecCRTracking_t crTracking; -// IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); -// uint32 requiredCRBits = crTracking.readCRBits; -// -// // scan backwards until we find the instruction that sets the CR -// sint32 crSetterInstructionIndex = -1; -// sint32 unsafeInstructionIndex = -1; -// for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) -// { -// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; -// IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); -// if (crTracking.readCRBits != 0) -// return; // dont handle complex cases for now -// if (crTracking.writtenCRBits != 0) -// { -// if ((crTracking.writtenCRBits&requiredCRBits) != 0) -// { -// crSetterInstructionIndex = i; -// break; -// } -// else -// { -// return; // other CR bits overwritten (dont handle complex cases) -// } -// } -// // is safe? (no risk of overwriting x64 eflags) -// if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || -// (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || -// (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || -// (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) -// continue; -// // not safe -// if (unsafeInstructionIndex == -1) -// unsafeInstructionIndex = i; -// } -// if (crSetterInstructionIndex < 0) -// return; -// if (unsafeInstructionIndex < 0) -// return; // no danger of overwriting eflags, don't reorder -// // check if we can move the CR setter instruction to after unsafeInstructionIndex -// PPCRecCRTracking_t crTrackingSetter = crTracking; -// IMLUsedRegisters regTrackingCRSetter; -// imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); -// if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) -// return; // we don't handle FPR dependency yet so just ignore FPR instructions -// IMLUsedRegisters registerTracking; -// if (regTrackingCRSetter.writtenNamedReg1 >= 0) -// { -// // CR setter does write GPR -// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) -// { -// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); -// // reads register written by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) -// { -// return; // cant move CR setter because of dependency -// } -// // writes register read by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) -// { -// return; // cant move CR setter because of dependency -// } -// // overwrites register written by CR setter? -// if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) -// return; -// } -// } -// else -// { -// // CR setter does not write GPR -// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) -// { -// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); -// // writes register read by CR setter? -// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) -// { -// return; // cant move CR setter because of dependency -// } -// } -// } -// -// // move CR setter instruction -//#ifdef CEMU_DEBUG_ASSERT -// if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) -// assert_dbg(); -//#endif -// IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); -// memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); -// imlSegment->imlList[crSetterInstructionIndex].make_no_op(); -} - -/* - * Move instructions which update the condition flags closer to the instruction that consumes them - * On x64 this improves performance since we often can avoid storing CR in memory - */ -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) -{ - // check if this segment has a conditional branch - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - _reorderConditionModifyInstructions(segIt); - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 8cdefe251..f722e7cac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -423,3 +423,4 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subr return cost; } + diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 70151422e..bf1868cf8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -71,34 +71,21 @@ struct PPCSegmentRegisterAllocatorInfo_t struct IMLSegment { sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) - sint32 startOffset{}; // offset to first instruction in iml instruction list - sint32 count{}; // number of instructions in segment + sint32 loopDepth{}; uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) uint32 x64Offset{}; // x64 code offset of segment start - uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) // list of intermediate instructions in this segment std::vector imlList; // segment link IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch IMLSegment* nextSegmentBranchTaken{}; bool nextSegmentIsUncertain{}; - sint32 loopDepth{}; std::vector list_prevSegments{}; - // PPC range of segment - uint32 ppcAddrMin{}; - uint32 ppcAddrMax{}; // enterable segments bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true - // PPC FPR use mask - //bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR - // CR use mask - uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) - uint32 crBitsRead{}; // all bits that are read in this segment - uint32 crBitsWritten{}; // bits that are written in this segment // register allocator info PPCSegmentRegisterAllocatorInfo_t raInfo{}; - // segment state API void SetEnterable(uint32 enterAddress); void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 2d1535fcb..e61a7288d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -245,63 +245,13 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return ppcRecFunc; } -bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenContext) { - // isolate entry points from function flow (enterable segments must not be the target of any other segment) - // this simplifies logic during register allocation - PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // insert name store instructions at the end of each segment but before branch instructions - //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - //{ - // if (segIt->imlList.size() == 0) - // continue; // ignore empty segments - // // analyze segment for register usage - // IMLUsedRegisters registersUsed; - // for (sint32 i = 0; i < segIt->imlList.size(); i++) - // { - // segIt->imlList[i].CheckRegisterUsage(®istersUsed); - // IMLReg accessedTempReg[5]; - // // intermediate FPRs - // accessedTempReg[0] = registersUsed.readFPR1; - // accessedTempReg[1] = registersUsed.readFPR2; - // accessedTempReg[2] = registersUsed.readFPR3; - // accessedTempReg[3] = registersUsed.readFPR4; - // accessedTempReg[4] = registersUsed.writtenFPR1; - // for (sint32 f = 0; f < 5; f++) - // { - // if (accessedTempReg[f].IsInvalid()) - // continue; - // uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; - // if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) - // { - // segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - // } - // } - // } - //} - - // merge certain float load+store patterns (must happen before FPR register remapping) - PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); - // delay byte swapping for certain load+store patterns - PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - - //if (numLoadedFPRRegisters > 0) - //{ - // if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - // { - // return false; - // } - //} - IMLRegisterAllocatorParameters raParam; for (auto& it : ppcImlGenContext.mappedRegs) raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first); - + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); @@ -335,6 +285,23 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); +} + +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +{ + // isolate entry points from function flow (enterable segments must not be the target of any other segment) + // this simplifies logic during register allocation + PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); + + // if GQRs can be predicted, optimize PSQ load/stores + PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); + + // merge certain float load+store patterns (must happen before FPR register remapping) + IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext); + // delay byte swapping for certain load+store patterns + IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); + + PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); @@ -355,7 +322,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun return false; } - // check if the current range got invalidated in the time it took to recompile it + // check if the current range got invalidated during the time it took to recompile it bool isInvalidated = false; for (auto& invRange : PPCRecompilerState.invalidationRanges) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f474b0156..38a20a24e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -168,7 +168,7 @@ IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IML uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - __debugbreak(); + DEBUG_BREAK; //if( mappedName == PPCREC_NAME_NONE ) //{ // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); @@ -187,7 +187,7 @@ uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcIml uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - __debugbreak(); + DEBUG_BREAK; //for(uint32 i=0; i<255; i++) //{ // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) @@ -242,14 +242,6 @@ IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) */ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) { - //if( loadNew == false ) - //{ - // uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - // if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - //} - //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } @@ -259,11 +251,6 @@ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, */ IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - //uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - //if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } @@ -434,27 +421,38 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("MFCR: Not implemented\n"); - return false; - - //sint32 rD, rA, rB; - //PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); - //return true; + sint32 rD, rA, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, 0); + for (sint32 i = 0; i < 32; i++) + { + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, i); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); // addition is only allowed between same-format regs + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regD, regD, 1); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regD, regCrBit); + } + return true; } bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("MTCRF: Not implemented\n"); - return false; - - //uint32 rS; - //uint32 crMask; - //PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); - //return true; + uint32 rS; + uint32 crMask; + PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 crBitMask = ppc_MTCRFMaskToCRBitMask(crMask); + for (sint32 f = 0; f < 32; f++) + { + if(((crBitMask >> f) & 1) == 0) + continue; + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, f); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regS, (31-f)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regCrBit, regTmp, 1); + } + return true; } void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) From b685a08e60cfbef474d7c389a4da512ad7848972 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:48:53 +0100 Subject: [PATCH 41/64] Fix compile errors due to rebase --- .../HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp | 2 +- src/util/helpers/StringBuf.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 2fb55c801..42fe619b2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1190,7 +1190,7 @@ void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* { if (routeDepth >= 64) { - forceLogDebug_printf("Recompiler RA route maximum depth exceeded\n"); + cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n"); return; } route[routeDepth] = currentSegment; diff --git a/src/util/helpers/StringBuf.h b/src/util/helpers/StringBuf.h index 8b34e54f7..6242fa4cd 100644 --- a/src/util/helpers/StringBuf.h +++ b/src/util/helpers/StringBuf.h @@ -44,11 +44,9 @@ class StringBuf void add(std::string_view appendedStr) { - if (this->length + appendedStr.size() + 1 >= this->limit) - _reserve(std::max(this->length + appendedStr.size() + 64, this->limit + this->limit / 2)); size_t copyLen = appendedStr.size(); - if (remainingLen < copyLen) - copyLen = remainingLen; + if (this->length + copyLen + 1 >= this->limit) + _reserve(std::max(this->length + copyLen + 64, this->limit + this->limit / 2)); char* outputStart = (char*)(this->str + this->length); std::copy(appendedStr.data(), appendedStr.data() + copyLen, outputStart); length += copyLen; From cc730b4257a953db7305f892de56b636ce0938c6 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 13 Jan 2024 17:13:53 +0100 Subject: [PATCH 42/64] PPCRec: Dead code elimination + reintroduce pre-rework optimizations --- .../Recompiler/BackendX64/BackendX64.cpp | 50 ++ src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 3 + .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 561 +++++++++--------- .../Recompiler/IML/IMLInstruction.cpp | 20 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 54 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 466 +++++++++++++++ .../Recompiler/IML/IMLRegisterAllocator.cpp | 8 +- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 12 + .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 14 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 14 +- .../Recompiler/PPCRecompilerImlGen.cpp | 4 + 11 files changed, 910 insertions(+), 296 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index ffd635a03..67585ed2a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -82,6 +82,36 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_Z; } +X86Cond _x86CondInverted(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_NZ; + case IMLCondition::NEQ: + return X86_CONDITION_Z; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_BE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_NB; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_LE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_NL; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + +X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted) +{ + if (condIsInverted) + return _x86CondInverted(imlCond); + return _x86Cond(imlCond); +} + /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted @@ -638,6 +668,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + x64GenContext->emitter->CMP_dd(regR, regA); + } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { if( regR != regA ) @@ -680,6 +714,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + sint32 imm = imlInstruction->op_r_immS32.immS32; + x64GenContext->emitter->CMP_di32(regR, imm); + } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); @@ -1082,6 +1121,13 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, return true; } +void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(cond, 0); +} + bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); @@ -1504,6 +1550,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) codeGenerationFailed = true; } + else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt); + } else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) { if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index b58fdfa8d..98c48a849 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -11,6 +11,9 @@ void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGen void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext); + // debug +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut); void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index d295f0aa8..192f06a1a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -75,12 +75,14 @@ void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) { - if (isLast) + if (val < 0) { - strOutput.addFmt("0x{:08x}", val); - return; + strOutput.add("-"); + val = -val; } - strOutput.addFmt("0x{:08x}, ", val); + strOutput.addFmt("0x{:08x}", val); + if (!isLast) + strOutput.add(", "); } void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) @@ -163,323 +165,332 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "ukn"; } -void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut) { - StringBuf strOutput(1024); - - strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); - if (imlSegment->isEnterable) - { - strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); - } - //else if (imlSegment->isJumpDestination) - //{ - // strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - //} - - debug_printf("%s\n", strOutput.c_str()); - - //strOutput.reset(); - //strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - //debug_printf("%s", strOutput.c_str()); + const sint32 lineOffsetParameters = 10;//18; - if (printLivenessRangeInfo) - { - strOutput.reset(); - IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); - debug_printf("%s\n", strOutput.c_str()); - } - //debug_printf("\n"); + StringBuf strOutput(1024); strOutput.reset(); - - sint32 lineOffsetParameters = 18; - - for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { - const IMLInstruction& inst = imlSegment->imlList[i]; - // don't log NOP instructions - if (inst.type == PPCREC_IML_TYPE_NO_OP) - continue; - strOutput.reset(); - strOutput.addFmt("{:02x} ", i); - if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) - { - if (inst.type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("R_NAME"); - else - strOutput.add("NAME_R"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + if (inst.type == PPCREC_IML_TYPE_R_NAME) + strOutput.add("R_NAME"); + else + strOutput.add("NAME_R"); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - if(inst.type == PPCREC_IML_TYPE_R_NAME) - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); + if(inst.type == PPCREC_IML_TYPE_R_NAME) + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.add("name_"); - if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) - { - strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) - { - strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) - strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); - else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) - strOutput.add("xer.ca"); - else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) - strOutput.add("xer.so"); - else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) - strOutput.add("xer.ov"); - else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) - strOutput.add("cpuReservation.ea"); - else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) - strOutput.add("cpuReservation.value"); - else - { - strOutput.addFmt("name_ukn{}", inst.op_r_name.name); - } - if (inst.type != PPCREC_IML_TYPE_R_NAME) - { - strOutput.add(", "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); - } - - } - else if (inst.type == PPCREC_IML_TYPE_R_R) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_R) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + strOutput.add("name_"); + if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } - else if (inst.type == PPCREC_IML_TYPE_COMPARE) + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) { - strOutput.add("CMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); - strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond)); - strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); + strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); } - else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { - strOutput.add("CMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); - strOutput.addFmt("{}", inst.op_compare_s32.immS32); - strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); - strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); + strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); + else { - strOutput.add("CJUMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); - if (!inst.op_conditional_jump.mustBeTrue) - strOutput.add("(inverted)"); + strOutput.addFmt("name_ukn{}", inst.op_r_name.name); } - else if (inst.type == PPCREC_IML_TYPE_JUMP) + if (inst.type != PPCREC_IML_TYPE_R_NAME) { - strOutput.add("JUMP"); + strOutput.add(", "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); } - else if (inst.type == PPCREC_IML_TYPE_R_R_S32) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); - IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + } + else if (inst.type == PPCREC_IML_TYPE_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); + strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); + strOutput.addFmt("{}", inst.op_compare_s32.immS32); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + strOutput.add("CJUMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); + if (!inst.op_conditional_jump.mustBeTrue) + strOutput.add("(inverted)"); + } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); - IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_S32) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); - IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); - } - else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || - inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - { - if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); + IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || + inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); - if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); - else - strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); - } - else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) - { - strOutput.add("ATOMIC_ST_U32"); + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); + else + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); - } - else if (inst.type == PPCREC_IML_TYPE_NO_OP) + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); + } + else if (inst.type == PPCREC_IML_TYPE_NO_OP) + { + strOutput.add("NOP"); + } + else if (inst.type == PPCREC_IML_TYPE_MACRO) + { + if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) { - strOutput.add("NOP"); + strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); } - else if (inst.type == PPCREC_IML_TYPE_MACRO) + else if (inst.operation == PPCREC_IML_MACRO_BL) { - if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) - { - strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); - } - else if (inst.operation == PPCREC_IML_MACRO_BL) - { - strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_B_FAR) - { - strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_LEAVE) - { - strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); - } - else if (inst.operation == PPCREC_IML_MACRO_HLE) - { - strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if (inst.operation == PPCREC_IML_MACRO_MFTB) - { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) - { - strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); - } - else - { - strOutput.addFmt("MACRO ukn operation {}", inst.operation); - } + strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + else if (inst.operation == PPCREC_IML_MACRO_B_FAR) { - strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); - if (inst.op_storeLoad.flags2.notExpanded) - { - strOutput.addFmt(" "); - } + strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + else if (inst.operation == PPCREC_IML_MACRO_LEAVE) { - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); - strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); + strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + else if (inst.operation == PPCREC_IML_MACRO_HLE) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); + strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + else if (inst.operation == PPCREC_IML_MACRO_MFTB) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); + strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); + strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); } - else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else { - strOutput.addFmt("CYCLE_CHECK"); + strOutput.addFmt("MACRO ukn operation {}", inst.operation); } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + } + else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + { + strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + if (inst.op_storeLoad.flags2.notExpanded) { - strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); - bool displayAsHex = false; - if (inst.operation == PPCREC_IML_OP_ASSIGN) - { - displayAsHex = true; - strOutput.add("="); - } - else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); - if (displayAsHex) - strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); - else - strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); - strOutput.add(" (conditional)"); + strOutput.addFmt(" "); } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + { + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); else + strOutput.add("U"); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); + } + else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + strOutput.addFmt("CYCLE_CHECK"); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + { + strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); + bool displayAsHex = false; + if (inst.operation == PPCREC_IML_OP_ASSIGN) { - strOutput.addFmt("Unknown iml type {}", inst.type); + displayAsHex = true; + strOutput.add("="); } - debug_printf("%s", strOutput.c_str()); + else + strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); + if (displayAsHex) + strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); + else + strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); + strOutput.add(" (conditional)"); + } + else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond)); + } + else + { + strOutput.addFmt("Unknown iml type {}", inst.type); + } + disassemblyLineOut.assign(strOutput.c_str()); +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) +{ + StringBuf strOutput(1024); + + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); + if (imlSegment->isEnterable) + { + strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); + } + if (imlSegment->deadCodeEliminationHintSeg) + { + strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg)); + } + debug_printf("%s\n", strOutput.c_str()); + + if (printLivenessRangeInfo) + { + strOutput.reset(); + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + debug_printf("%s\n", strOutput.c_str()); + } + //debug_printf("\n"); + strOutput.reset(); + + std::string disassemblyLine; + for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + { + const IMLInstruction& inst = imlSegment->imlList[i]; + // don't log NOP instructions + if (inst.type == PPCREC_IML_TYPE_NO_OP) + continue; + //strOutput.addFmt("{:02x} ", i); + debug_printf(fmt::format("{:02x} ", i).c_str()); + disassemblyLine.clear(); + IMLDebug_DisassembleInstruction(inst, disassemblyLine); + debug_printf("%s", disassemblyLine.c_str()); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index f2476e612..53841bafc 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -26,7 +26,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_R) { - if (operation == PPCREC_IML_OP_DCBZ) + if (operation == PPCREC_IML_OP_DCBZ || + operation == PPCREC_IML_OP_X86_CMP) { // both operands are read only registersUsed->readGPR1 = op_r_r.regR; @@ -58,13 +59,18 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (operation == PPCREC_IML_OP_LEFT_ROTATE) { - // operand register is read and write + // register operand is read and write registersUsed->readGPR1 = op_r_immS32.regR; registersUsed->writtenGPR1 = op_r_immS32.regR; } + else if (operation == PPCREC_IML_OP_X86_CMP) + { + // register operand is read only + registersUsed->readGPR1 = op_r_immS32.regR; + } else { - // operand register is write only + // register operand is write only // todo - use explicit lists, avoid default cases registersUsed->writtenGPR1 = op_r_immS32.regR; } @@ -453,6 +459,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readFPR1 = op_fpr_compare.regA; registersUsed->readFPR2 = op_fpr_compare.regB; } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } else { cemu_assert_unimplemented(); @@ -675,6 +685,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable); op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } else { cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 817fef190..78863931c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -180,6 +180,11 @@ enum // R_R_R_carry PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1) + + // X86 extension + PPCREC_IML_OP_X86_CMP, // R_R and R_S32 + + PPCREC_IML_OP_INVALID }; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) @@ -261,6 +266,9 @@ enum PPCREC_IML_TYPE_FPR_R, PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* + + // X86 specific + PPCREC_IML_TYPE_X86_EFLAGS_JCC, }; enum // IMLName @@ -350,13 +358,29 @@ struct IMLUsedRegisters }; }; + bool IsWrittenByRegId(IMLRegID regId) const + { + if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return true; + if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + return true; + return false; + } + bool IsBaseGPRWritten(IMLReg imlReg) const { cemu_assert_debug(imlReg.IsValid()); auto regId = imlReg.GetRegID(); - if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return IsWrittenByRegId(regId); + } + + bool IsRegIdRead(IMLRegID regId) const + { + if (readGPR1.IsValid() && readGPR1.GetRegID() == regId) return true; - if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + if (readGPR2.IsValid() && readGPR2.GetRegID() == regId) + return true; + if (readGPR3.IsValid() && readGPR3.GetRegID() == regId) return true; return false; } @@ -556,6 +580,12 @@ struct IMLInstruction uint8 crBitIndex; bool bitMustBeSet; }op_conditional_r_s32; + // X86 specific + struct + { + IMLCondition cond; + bool invertedCondition; + }op_x86_eflags_jcc; }; bool IsSuffixInstruction() const @@ -568,7 +598,8 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || - type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || + type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) return true; return false; } @@ -676,7 +707,7 @@ struct IMLInstruction void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE; - this->operation = -999; + this->operation = PPCREC_IML_OP_INVALID; this->op_compare.regR = regR; this->op_compare.regA = regA; this->op_compare.regB = regB; @@ -686,7 +717,7 @@ struct IMLInstruction void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE_S32; - this->operation = -999; + this->operation = PPCREC_IML_OP_INVALID; this->op_compare_s32.regR = regR; this->op_compare_s32.regA = regA; this->op_compare_s32.immS32 = immS32; @@ -696,7 +727,7 @@ struct IMLInstruction void make_conditional_jump(IMLReg regBool, bool mustBeTrue) { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; - this->operation = -999; + this->operation = PPCREC_IML_OP_INVALID; this->op_conditional_jump.registerBool = regBool; this->op_conditional_jump.mustBeTrue = mustBeTrue; } @@ -704,7 +735,7 @@ struct IMLInstruction void make_jump() { this->type = PPCREC_IML_TYPE_JUMP; - this->operation = -999; + this->operation = PPCREC_IML_OP_INVALID; } // load from memory @@ -753,6 +784,15 @@ struct IMLInstruction this->op_fpr_compare.cond = cond; } + /* X86 specific */ + void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition) + { + this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC; + this->operation = -999; + this->op_x86_eflags_jcc.cond = cond; + this->op_x86_eflags_jcc.invertedCondition = invertedCondition; + } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; void RewriteGPR(const std::unordered_map& translationTable); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index cdf922ce7..2856eb247 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,6 +6,11 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" +#include "Common/FileStream.h" + +#include +#include + IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); @@ -328,3 +333,464 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) } } } + +// analyses register dependencies across the entire function +// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten) +class IMLOptimizerRegIOAnalysis +{ + public: + // constructor with segment pointer list as span + IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId) + { + m_segRegisterInOutList.resize(segmentList.size()); + } + + struct IMLSegmentRegisterInOut + { + // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already + std::unordered_set regWritten; // registers which are modified in this segment + std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments) + std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info) + }; + + // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment + // then in a second step propagate the dependencies across linked segments + void ComputeDepedencies() + { + std::vector& segRegisterInOutList = m_segRegisterInOutList; + IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data(); + uint32 index = 0; + for(auto& seg : m_segmentList) + { + seg->momentaryIndex = index; + index++; + for(auto& instr : seg->imlList) + { + IMLUsedRegisters registerUsage; + instr.CheckRegisterUsage(®isterUsage); + // registers are considered imported if they are read before being written in this seg + registerUsage.ForEachReadGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + if (!segIO->regWritten.contains(gprId)) + { + segIO->regImported.insert(gprId); + } + }); + registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + segIO->regWritten.insert(gprId); + }); + } + segIO++; + } + // for every exit segment, import all registers + for(auto& seg : m_segmentList) + { + if (!seg->nextSegmentIsUncertain) + continue; + if(seg->deadCodeEliminationHintSeg) + continue; + IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex]; + for(uint32 i=0; i<=m_maxRegId; i++) + { + segIO.regImported.insert((IMLRegID)i); + } + } + // broadcast dependencies across segment chains + std::unordered_set segIdsWhichNeedUpdate; + for (uint32 i = 0; i < m_segmentList.size(); i++) + { + segIdsWhichNeedUpdate.insert(i); + } + while(!segIdsWhichNeedUpdate.empty()) + { + auto firstIt = segIdsWhichNeedUpdate.begin(); + uint32 segId = *firstIt; + segIdsWhichNeedUpdate.erase(firstIt); + // forward regImported and regForward to earlier segments into their regForward, unless the register is written + auto& curSeg = m_segmentList[segId]; + IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId]; + for(auto& prevSeg : curSeg->list_prevSegments) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + // same for hint links + for(auto& prevSeg : curSeg->list_deadCodeHintBy) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + } + } + + std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg) + { + std::unordered_set regsNeeded; + if(seg.nextSegmentIsUncertain) + { + if(seg.deadCodeEliminationHintSeg) + { + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + else + { + // add all regs + for(uint32 i = 0; i <= m_maxRegId; i++) + regsNeeded.insert(i); + } + return regsNeeded; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + return regsNeeded; + } + + bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId) + { + if(seg.nextSegmentIsUncertain) + { + if(!seg.deadCodeEliminationHintSeg) + return true; + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + return false; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + return false; + } + + private: + std::span m_segmentList; + uint32 m_maxRegId; + + std::vector m_segRegisterInOutList; + +}; + +// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id) +sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1) +{ + sint32 endIndex = std::max(startIndex - maxScanDistance, 0); + for (sint32 i = startIndex; i >= endIndex; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + if (registersUsed.IsBaseGPRWritten(reg)) + return i; + } + return -1; +} + +// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact +// initialIndex is inclusive, targetIndex is exclusive +bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + boost::container::static_vector regsWritten; + boost::container::static_vector regsRead; + // get list of read and written registers + IMLUsedRegisters registersUsed; + seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + if (isWritten) + regsWritten.push_back(reg.GetRegID()); + else + regsRead.push_back(reg.GetRegID()); + }); + // check all the instructions inbetween + if(initialIndex < targetIndex) + { + sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself + sint32 scanEndIndex = targetIndex; + for (sint32 i = scanStartIndex; i < scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written) + // and any of it's written registers must not be read + bool canMove = true; + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + IMLRegID regId = reg.GetRegID(); + if (!isWritten) + canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end(); + else + canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end(); + }); + if(!canMove) + return false; + } + } + else + { + cemu_assert_unimplemented(); // backwards scan is todo + return false; + } + return true; +} + +sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId) +{ + cemu_assert_debug(scanStartIndex <= scanEndIndex); + cemu_assert_debug(scanEndIndex < seg.imlList.size()); + sint32 count = 0; + for (sint32 i = scanStartIndex; i <= scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + if (reg.GetRegID() == regId) + count++; + }); + } + return count; +} + +// move instruction from one index to another +// instruction will be inserted before the instruction at targetIndex +// returns the new instruction index of the moved instruction +sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + cemu_assert_debug(initialIndex != targetIndex); + IMLInstruction temp = seg.imlList[initialIndex]; + if (initialIndex < targetIndex) + { + cemu_assert_debug(targetIndex > 0); + targetIndex--; + std::copy_backward(seg.imlList.begin() + initialIndex + 1, seg.imlList.begin() + targetIndex + 1, seg.imlList.begin() + targetIndex); + seg.imlList[targetIndex] = temp; + return targetIndex; + } + else + { + cemu_assert_unimplemented(); // testing needed + std::copy(seg.imlList.begin() + targetIndex, seg.imlList.begin() + initialIndex, seg.imlList.begin() + targetIndex + 1); + seg.imlList[targetIndex] = temp; + return targetIndex; + } +} + +// x86 specific +bool IMLOptimizerX86_ModifiesEFlags(IMLInstruction& inst) +{ + // this is a very conservative implementation. There are more cases but this is good enough for now + if(inst.type == PPCREC_IML_TYPE_NAME_R || inst.type == PPCREC_IML_TYPE_R_NAME) + return false; + if((inst.type == PPCREC_IML_TYPE_R_R || inst.type == PPCREC_IML_TYPE_R_S32) && inst.operation == PPCREC_IML_OP_ASSIGN) + return false; + return true; // if we dont know for sure, assume it does +} + +void IMLOptimizer_DebugPrintSeg(ppcImlGenContext_t& ppcImlGenContext, IMLSegment& seg) +{ + printf("----------------\n"); + IMLDebug_DumpSegment(&ppcImlGenContext, &seg); + fflush(stdout); +} + +void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + // algorithm works like this: + // Calculate which registers need to be preserved at the end of each segment + // Then for each segment: + // - Iterate instructions backwards + // - Maintain a list of registers which are read at a later point (initially this is the list from the first step) + // - If an instruction only modifies registers which are not in the read list, then it is dead code and can be replaced with a no-op + + std::unordered_set regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg); + + // start with suffix instruction + if(seg.HasSuffixInstruction()) + { + IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + } + // iterate instructions backwards + for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + // register read -> remove from overwritten list + // register written -> add to overwritten list + + // check if this instruction only writes registers which will never be read + bool onlyWritesRedundantRegisters = true; + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + if (regsNeeded.contains(reg.GetRegID())) + onlyWritesRedundantRegisters = false; + }); + // check if any of the written registers are read after this point + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + // for now we only allow some instruction types to be deleted, eventually we should find a safer way to identify side effects that can't be judged by register usage alone + if(imlInstruction.type != PPCREC_IML_TYPE_R_R && imlInstruction.type != PPCREC_IML_TYPE_R_R_S32 && imlInstruction.type != PPCREC_IML_TYPE_COMPARE && imlInstruction.type != PPCREC_IML_TYPE_COMPARE_S32) + continue; + if(onlyWritesRedundantRegisters) + { + imlInstruction.make_no_op(); + } + } +} + +void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + // convert and optimize bool condition jumps to eflags condition jumps + // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early + // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused + // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo) + + // check if this segment ends with a conditional jump + if(!seg.HasSuffixInstruction()) + return; + sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex(); + if(cjmpInstIndex < 0) + return; + IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex]; + if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP ) + return; + IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool; + bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue; + // find the instruction which sets the bool + sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20); + if(cmpInstrIndex < 0) + return; + // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition + IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex]; + IMLCondition cond; + if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE) + cond = condSetterInstr.op_compare.cond; + else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32) + cond = condSetterInstr.op_compare_s32.cond; + else + return; + // check if instructions inbetween modify eflags + sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump + for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--) + { + if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i])) + { + indexEflagsSafeStart = i+1; + break; + } + } + if(indexEflagsSafeStart >= 0) + { + cemu_assert(indexEflagsSafeStart > 0); + // there are eflags-modifying instructions inbetween the bool setter and cjump + // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart) + bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + if(!canMove) + { + return; + } + else + { + cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + } + } + // we can turn the jump into an eflags jump + cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition); + + if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID())) + return; // bool register is used beyond the CMP, we can't drop it + + auto& cmpInstr = seg.imlList[cmpInstrIndex]; + cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32); + if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE) + { + IMLReg regA = cmpInstr.op_compare.regA; + IMLReg regB = cmpInstr.op_compare.regB; + seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB); + } + else + { + IMLReg regA = cmpInstr.op_compare_s32.regA; + sint32 val = cmpInstr.op_compare_s32.immS32; + seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val); + } + +} + +void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg); + + // x86 specific optimizations + IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis) +} + +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext) +{ + IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId()); + regIoAnalysis.ComputeDepedencies(); + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 42fe619b2..96f8d9f0f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -75,14 +75,14 @@ bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex { if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex) { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->nextSegmentBranchTaken) { if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex) { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->raInfo.isPartOfProcessedLoop) @@ -341,8 +341,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment { // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) // algorithm goes as follows: - // 1) Iterate all instructions from beginning to end and keep a list of covering ranges - // 2) If we encounter an instruction with a fixed register we: + // 1) Iterate all instructions in the function from beginning to end and keep a list of active ranges for the currently iterated instruction + // 2) If we encounter an instruction with a fixed register requirement we: // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index bf1868cf8..f0420b011 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -81,6 +81,10 @@ struct IMLSegment IMLSegment* nextSegmentBranchTaken{}; bool nextSegmentIsUncertain{}; std::vector list_prevSegments{}; + // source for overwrite analysis (if nextSegmentIsUncertain is true) + // sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment + IMLSegment* deadCodeEliminationHintSeg{}; + std::vector list_deadCodeHintBy{}; // enterable segments bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true @@ -101,6 +105,14 @@ struct IMLSegment return nextSegmentBranchNotTaken; } + void SetNextSegmentForOverwriteHints(IMLSegment* seg) + { + cemu_assert_debug(!deadCodeEliminationHintSeg); + deadCodeEliminationHintSeg = seg; + if (seg) + seg->list_deadCodeHintBy.push_back(this); + } + // instruction API IMLInstruction* AppendInstruction(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index e61a7288d..25a2c1635 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -139,7 +139,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP cemuLog_log(LogType::Force, "Attempting to recompile function outside of allowed code area"); return nullptr; } - uint32 codeGenRangeStart; uint32 codeGenRangeSize = 0; coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize); @@ -160,6 +159,7 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; + ppcImlGenContext.debug_entryPPCAddress = range.startAddress; bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { @@ -240,7 +240,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } - cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + + cemuLog_logDebug(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code); return ppcRecFunc; } @@ -301,11 +303,19 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // delay byte swapping for certain load+store patterns IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); + IMLOptimizer_StandardOptimizationPass(ppcImlGenContext); + PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); +// if(ppcImlGenContext.debug_entryPPCAddress == 0x0200E1E8) +// { +// IMLDebug_Dump(&ppcImlGenContext); +// __debugbreak(); +// } + return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 080ce2fa0..94b3fcd97 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -41,13 +41,15 @@ struct ppcImlGenContext_t bool PSE{ true }; // cycle counter uint32 cyclesSinceLastBranch; // used to track ppc cycles - // temporary general purpose registers - //uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; - // temporary floating point registers (single and double precision) - //uint32 mappedFPRRegister[256]; - std::unordered_map mappedRegs; + uint32 GetMaxRegId() const + { + if (mappedRegs.empty()) + return 0; + return mappedRegs.size()-1; + } + // list of segments std::vector segmentList2; // code generation control @@ -62,6 +64,8 @@ struct ppcImlGenContext_t { bool modifiesGQR[8]; }tracking; + // debug helpers + uint32 debug_entryPPCAddress{0}; ~ppcImlGenContext_t() { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 38a20a24e..b89b7f7c7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2935,6 +2935,10 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P splitSeg->SetLinkBranchTaken(exitSegment); exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0, IMLREG_INVALID); + + cemu_assert_debug(splitSeg->nextSegmentBranchNotTaken); + // let the IML optimizer and RA know that the original segment should be used during analysis for dead code elimination + exitSegment->SetNextSegmentForOverwriteHints(splitSeg->nextSegmentBranchNotTaken); } void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) From 450c0a529cb1866a5e76fd4d83ac207f8f0c611d Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 1 Sep 2024 02:52:45 +0200 Subject: [PATCH 43/64] PPCRec: Simplify RA code and clean it up a bit --- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 27 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 409 +++++++++--------- .../IML/IMLRegisterAllocatorRanges.cpp | 288 ++++++------ .../IML/IMLRegisterAllocatorRanges.h | 77 +++- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 55 +-- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 15 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 5 - 7 files changed, 464 insertions(+), 412 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 192f06a1a..cca8b61e4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -94,23 +94,12 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml debug_printf(" "); index++; } - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { if (offset == subrangeItr->start.index) { - if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) - { - debug_printf("*%-2d", subrangeItr->range->virtualRegister); - } - else - { - debug_printf("|%-2d", subrangeItr->range->virtualRegister); - } - } - else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index ) - { - debug_printf("* "); + debug_printf("|%-2d", subrangeItr->GetVirtualRegister()); } else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) { @@ -122,7 +111,7 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml } index += 3; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } } @@ -501,19 +490,19 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (printLivenessRangeInfo) { debug_printf("Ranges-VirtReg "); - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("v%-2d", subrangeItr->range->virtualRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + debug_printf("v%-2d", subrangeItr->GetVirtualRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; } debug_printf("\n"); debug_printf("Ranges-PhysReg "); subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("p%-2d", subrangeItr->range->physicalRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + debug_printf("p%-2d", subrangeItr->GetPhysicalRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; } debug_printf("\n"); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 96f8d9f0f..a59b88bd2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -50,10 +50,9 @@ struct IMLRegisterAllocatorContext }; -uint32 recRACurrentIterationIndex = 0; - uint32 PPCRecRA_getNextIterationIndex() { + static uint32 recRACurrentIterationIndex = 0; recRACurrentIterationIndex++; return recRACurrentIterationIndex; } @@ -120,7 +119,7 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml #define SUBRANGE_LIST_SIZE (128) -sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) +sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessRange* subrange, sint32 startIndex) { for (sint32 i = 0; i < subrange->list_locations.size(); i++) { @@ -135,12 +134,12 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe { sint32 minDistance = INT_MAX; // next - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { - if (subrangeItr->range->physicalRegister != physRegister) + if (subrangeItr->GetPhysicalRegister() != physRegister) { - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) @@ -149,7 +148,7 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe { minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } return minDistance; } @@ -175,7 +174,7 @@ struct IMLRALivenessTimeline } // manually add an active range - void AddActiveRange(raLivenessSubrange_t* subrange) + void AddActiveRange(raLivenessRange* subrange) { activeRanges.emplace_back(subrange); } @@ -187,7 +186,7 @@ struct IMLRALivenessTimeline size_t count = activeRanges.size(); for (size_t f = 0; f < count; f++) { - raLivenessSubrange_t* liverange = activeRanges[f]; + raLivenessRange* liverange = activeRanges[f]; if (liverange->end.index <= instructionIndex) { #ifdef CEMU_DEBUG_ASSERT @@ -205,18 +204,18 @@ struct IMLRALivenessTimeline activeRanges.resize(count); } - std::span GetExpiredRanges() + std::span GetExpiredRanges() { return { expiredRanges.data(), expiredRanges.size() }; } - boost::container::small_vector activeRanges; + boost::container::small_vector activeRanges; private: - boost::container::small_vector expiredRanges; + boost::container::small_vector expiredRanges; }; -bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) +bool IsRangeOverlapping(raLivenessRange* rangeA, raLivenessRange* rangeB) { if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) return true; @@ -228,39 +227,40 @@ bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rang } // mark occupied registers by any overlapping range as unavailable in physRegSet -void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) +void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet) { - for (auto& subrange : range->list_subranges) + auto clusterRanges = range2->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { IMLSegment* imlSegment = subrange->imlSegment; - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrange == subrangeItr) { // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } if(IsRangeOverlapping(subrange, subrangeItr)) { - if (subrangeItr->range->physicalRegister >= 0) - physRegSet.SetReserved(subrangeItr->range->physicalRegister); + if (subrangeItr->GetPhysicalRegister() >= 0) + physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } } } -bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } +bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->start.index < rhs->start.index; } void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) { - raLivenessSubrange_t* subrangeList[4096+1]; + raLivenessRange* subrangeList[4096+1]; sint32 count = 0; // disassemble linked list - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { if (count >= 4096) @@ -268,7 +268,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) subrangeList[count] = subrangeItr; count++; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (count == 0) { @@ -280,12 +280,12 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) // reassemble linked list subrangeList[count] = nullptr; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; - subrangeList[0]->link_segmentSubrangesGPR.prev = nullptr; - subrangeList[0]->link_segmentSubrangesGPR.next = subrangeList[1]; + subrangeList[0]->link_allSegmentRanges.prev = nullptr; + subrangeList[0]->link_allSegmentRanges.next = subrangeList[1]; for (sint32 i = 1; i < count; i++) { - subrangeList[i]->link_segmentSubrangesGPR.prev = subrangeList[i - 1]; - subrangeList[i]->link_segmentSubrangesGPR.next = subrangeList[i + 1]; + subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1]; + subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1]; } // validate list #ifdef CEMU_DEBUG_ASSERT @@ -299,40 +299,40 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) assert_dbg(); currentStartIndex = subrangeItr->start.index; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (count != count2) assert_dbg(); #endif } -std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) +std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) { - return imlSegment->raInfo.linkedList_perVirtualGPR2; + return imlSegment->raInfo.linkedList_perVirtualRegister; } -raLivenessSubrange_t* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) +raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) { - auto it = imlSegment->raInfo.linkedList_perVirtualGPR2.find(regId); - if (it == imlSegment->raInfo.linkedList_perVirtualGPR2.end()) + auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId); + if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end()) return nullptr; return it->second; } -raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) +raLivenessRange* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) { uint32 regId = regToSearch.GetRegID(); - raLivenessSubrange_t* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); + raLivenessRange* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); while (subrangeItr) { if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) return subrangeItr; - subrangeItr = subrangeItr->link_sameVirtualRegisterGPR.next; + subrangeItr = subrangeItr->link_sameVirtualRegister.next; } return nullptr; } -void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) +void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessRange* subrange, sint32 instructionIndex) { DEBUG_BREAK; } @@ -381,42 +381,42 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon _sortSegmentAllSubrangesLinkedList(imlSegment); IMLRALivenessTimeline livenessTimeline; - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { sint32 currentIndex = subrangeItr->start.index; PPCRecRA_debugValidateSubrange(subrangeItr); livenessTimeline.ExpireRanges(std::min(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges // if subrange already has register assigned then add it to the active list and continue - if (subrangeItr->range->physicalRegister >= 0) + if (subrangeItr->GetPhysicalRegister() >= 0) { // verify if register is actually available #ifdef CEMU_DEBUG_ASSERT for (auto& liverangeItr : livenessTimeline.activeRanges) { // check for register mismatch - cemu_assert_debug(liverangeItr->range->physicalRegister != subrangeItr->range->physicalRegister); + cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister()); } #endif livenessTimeline.AddActiveRange(subrangeItr); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } // find free register for current subrangeItr and segment - IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->range->virtualRegister); + IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister()); IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool for (auto& liverangeItr : livenessTimeline.activeRanges) { - cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); - physRegSet.SetReserved(liverangeItr->range->physicalRegister); + cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); + physRegSet.SetReserved(liverangeItr->GetPhysicalRegister()); } // check intersections with other ranges and determine allowed registers IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) if(physRegSet.HasAnyAvailable()) { // check globally in all segments - PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet); + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, physRegSet); } if (!physRegSet.HasAnyAvailable()) { @@ -427,7 +427,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon struct { sint32 distance; - raLivenessSubrange_t* largestHoleSubrange; + raLivenessRange* largestHoleSubrange; sint32 cost; // additional cost of choosing this candidate }localRangeHoleCutting; // split current range (this is generally only a good choice when the current range is long but rarely used) @@ -440,7 +440,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon // explode a inter-segment range (prefer ranges that are not read/written in this segment) struct { - raLivenessRange_t* range; + raLivenessRange* range; sint32 cost; sint32 distance; // size of hole // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange @@ -540,7 +540,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon if( distance < 2) continue; sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); + cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); // if the hole is not large enough, add cost of splitting current subrange if (distance < requiredSize) { @@ -553,7 +553,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = distance; - spillStrategies.explodeRange.range = candidate->range; + spillStrategies.explodeRange.range = candidate; } } // choose strategy @@ -581,7 +581,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon else if (subrangeItr->start.index == RA_INTER_RANGE_START) { // alternative strategy if we have no other choice: explode current range - PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr->range); + PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); } else assert_dbg(); @@ -603,27 +603,27 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon if (candidate->end.index != RA_INTER_RANGE_END) continue; // only select candidates that clash with current subrange - if (candidate->range->physicalRegister < 0 && candidate != subrangeItr) + if (candidate->GetPhysicalRegister() < 0 && candidate != subrangeItr) continue; sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); + cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); // compare with current best candidate for this strategy if (cost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = candidate->range; + spillStrategies.explodeRange.range = candidate; } } // add current range as a candidate too sint32 ownCost; - ownCost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(subrangeItr->range); + ownCost = PPCRecRARange_estimateCostAfterRangeExplode(subrangeItr); if (ownCost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = ownCost; spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = subrangeItr->range; + spillStrategies.explodeRange.range = subrangeItr; } if (spillStrategies.explodeRange.cost == INT_MAX) assert_dbg(); // should not happen @@ -632,10 +632,11 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon return false; } // assign register to range - subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); + //subrangeItr->SetPhysicalRegister(physRegSet.GetFirstAvailableReg()); + subrangeItr->SetPhysicalRegisterForCluster(physRegSet.GetFirstAvailableReg()); livenessTimeline.AddActiveRange(subrangeItr); // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } return true; } @@ -673,137 +674,30 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* } } -struct subrangeEndingInfo_t -{ - //boost::container::small_vector subrangeList2; - raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; - sint32 subrangeCount; - - bool hasUndefinedEndings; -}; - -void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) -{ - if (depth >= 30) - { - info->hasUndefinedEndings = true; - return; - } - if (subrange->lastIterationIndex == iterationIndex) - return; // already processed - subrange->lastIterationIndex = iterationIndex; - if (subrange->hasStoreDelayed) - return; // no need to traverse this subrange - IMLSegment* imlSegment = subrange->imlSegment; - if (subrange->end.index != RA_INTER_RANGE_END) - { - // ending segment - if (info->subrangeCount >= SUBRANGE_LIST_SIZE) - { - info->hasUndefinedEndings = true; - return; - } - else - { - info->subrangeList[info->subrangeCount] = subrange; - info->subrangeCount++; - } - return; - } - - // traverse next subranges in flow - if (imlSegment->nextSegmentBranchNotTaken) - { - if (subrange->subrangeBranchNotTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); - } - } - if (imlSegment->nextSegmentBranchTaken) - { - if (subrange->subrangeBranchTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); - } - } -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) -{ - if (subrange->end.index != RA_INTER_RANGE_END) - return; - // analyze data flow across segments (if this segment has writes) - if (subrange->hasStore) - { - subrangeEndingInfo_t writeEndingInfo; - writeEndingInfo.subrangeCount = 0; - writeEndingInfo.hasUndefinedEndings = false; - _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); - if (writeEndingInfo.hasUndefinedEndings == false) - { - // get cost of delaying store into endings - sint32 delayStoreCost = 0; - bool alreadyStoredInAllEndings = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - if( subrangeItr->hasStore ) - continue; // this ending already stores, no extra cost - alreadyStoredInAllEndings = false; - sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); - delayStoreCost = std::max(storeCost, delayStoreCost); - } - if (alreadyStoredInAllEndings) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - } - else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - subrangeItr->hasStore = true; - } - } - } - } -} - inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) { return IMLReg(baseFormat, baseFormat, 0, regId); } -void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) +void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size()); for (sint32 i = 0; i < loadList.size(); i++) { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->range->virtualRegister]; + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->GetVirtualRegister()]; cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->range->physicalRegister), loadList[i]->range->name); + imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->GetPhysicalRegister()), loadList[i]->GetName()); } } -void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) +void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size()); for (size_t i = 0; i < storeList.size(); i++) { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->range->virtualRegister]; + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->GetVirtualRegister()]; cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->range->name, _MakeNativeReg(baseFormat, storeList[i]->range->physicalRegister)); + imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->GetName(), _MakeNativeReg(baseFormat, storeList[i]->GetPhysicalRegister())); } } @@ -814,7 +708,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) @@ -827,12 +721,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML assert_dbg(); } // update translation table - cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->range->virtualRegister)); + cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->GetVirtualRegister())); #endif - virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } // process instructions while(index < imlSegment->imlList.size() + 1) @@ -842,7 +736,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) { // update translation table - virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister); + virtId2PhysRegIdMap.erase(expiredRange->GetVirtualRegister()); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (expiredRange->hasStore) @@ -874,9 +768,9 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML subrangeItr->start.index--; } // update translation table - virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.insert_or_assign(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } // rewrite registers if (index < imlSegment->imlList.size()) @@ -885,12 +779,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML index++; } // expire infinite subranges (subranges which cross the segment border) - std::vector loadStoreList; + std::vector loadStoreList; livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); for (auto liverange : livenessTimeline.GetExpiredRanges()) { // update translation table - virtId2PhysRegIdMap.erase(liverange->range->virtualRegister); + virtId2PhysRegIdMap.erase(liverange->GetVirtualRegister()); // store GPR if (liverange->hasStore) loadStoreList.emplace_back(liverange); @@ -910,10 +804,10 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML if (subrangeItr->_noLoad == false) loadStoreList.emplace_back(subrangeItr); // update translation table - virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (!loadStoreList.empty()) PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); @@ -1026,7 +920,7 @@ void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) } } -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) +raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name) { IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); if (!abstractRange) @@ -1034,7 +928,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext if (abstractRange->isProcessed) { // return already existing segment - raLivenessSubrange_t* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); + raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); cemu_assert_debug(existingRange); return existingRange; } @@ -1043,7 +937,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext #ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); #endif - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); + raLivenessRange* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, imlSegment, vGPR, name, abstractRange->usageStart, abstractRange->usageEnd); // traverse forward if (abstractRange->usageEnd == RA_INTER_RANGE_END) { @@ -1052,7 +946,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, range); + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name); + subrange->subrangeBranchTaken->previousRanges.push_back(subrange); cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); } } @@ -1061,7 +956,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name); + subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange); cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); } } @@ -1075,7 +971,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext if(!prevRange) continue; if (prevRange->usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ctx, it, vGPR, range); + PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name); } } // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction @@ -1100,13 +996,12 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML if(it.second.isProcessed) continue; IMLRegID regId = it.first; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second); - PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); + PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second); } // fill created ranges with read/write location indices // note that at this point there is only one range per register per segment // and the algorithm below relies on this - const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); + const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); size_t index = 0; IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) @@ -1114,7 +1009,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { IMLRegID gprId = gprReg.GetRegID(); - raLivenessSubrange_t* subrange = regToSubrange.find(gprId)->second; + raLivenessRange* subrange = regToSubrange.find(gprId)->second; PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT if ((sint32)index < subrange->start.index) @@ -1351,7 +1246,7 @@ void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ct IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); } -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) +void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessRange* subrange) { bool isRead = false; bool isWritten = false; @@ -1376,23 +1271,135 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) subrange->_noLoad = true; } + +struct subrangeEndingInfo_t +{ + //boost::container::small_vector subrangeList2; + raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE]; + sint32 subrangeCount; + + bool hasUndefinedEndings; +}; + +void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) +{ + if (depth >= 30) + { + info->hasUndefinedEndings = true; + return; + } + if (subrange->lastIterationIndex == iterationIndex) + return; // already processed + subrange->lastIterationIndex = iterationIndex; + if (subrange->hasStoreDelayed) + return; // no need to traverse this subrange + IMLSegment* imlSegment = subrange->imlSegment; + if (subrange->end.index != RA_INTER_RANGE_END) + { + // ending segment + if (info->subrangeCount >= SUBRANGE_LIST_SIZE) + { + info->hasUndefinedEndings = true; + return; + } + else + { + info->subrangeList[info->subrangeCount] = subrange; + info->subrangeCount++; + } + return; + } + + // traverse next subranges in flow + if (imlSegment->nextSegmentBranchNotTaken) + { + if (subrange->subrangeBranchNotTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); + } + } + if (imlSegment->nextSegmentBranchTaken) + { + if (subrange->subrangeBranchTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); + } + } +} + +static void _analyzeRangeDataFlow(raLivenessRange* subrange) +{ + if (subrange->end.index != RA_INTER_RANGE_END) + return; + // analyze data flow across segments (if this segment has writes) + if (subrange->hasStore) + { + subrangeEndingInfo_t writeEndingInfo; + writeEndingInfo.subrangeCount = 0; + writeEndingInfo.hasUndefinedEndings = false; + _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); + if (writeEndingInfo.hasUndefinedEndings == false) + { + // get cost of delaying store into endings + sint32 delayStoreCost = 0; + bool alreadyStoredInAllEndings = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + if( subrangeItr->hasStore ) + continue; // this ending already stores, no extra cost + alreadyStoredInAllEndings = false; + sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); + delayStoreCost = std::max(storeCost, delayStoreCost); + } + if (alreadyStoredInAllEndings) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + } + else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + subrangeItr->hasStore = true; + } + } + } + } +} + void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) { - // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore - // first do a per-subrange pass - for (auto& range : ppcImlGenContext->raInfo.list_ranges) + // this function is called after _assignRegisters(), which means that all liveness ranges are already final and must not be changed anymore + // in the first pass we track read/write dependencies + for(auto& seg : ppcImlGenContext->segmentList2) { - for (auto& subrange : range->list_subranges) + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while(subrange) { PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + subrange = subrange->link_allSegmentRanges.next; } } - // then do a second pass where we scan along subrange flow - for (auto& range : ppcImlGenContext->raInfo.list_ranges) + // then we do a second pass where we scan along subrange flow + for(auto& seg : ppcImlGenContext->segmentList2) { - for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while(subrange) { _analyzeRangeDataFlow(subrange); + subrange = subrange->link_allSegmentRanges.next; } } } @@ -1407,8 +1414,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment - ppcImlGenContext->raInfo.list_ranges = std::vector(); - ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); IMLRA_CalculateLivenessRanges(ctx); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index f722e7cac..602cdfa77 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -3,45 +3,110 @@ #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" -void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) +uint32 PPCRecRA_getNextIterationIndex(); + +IMLRegID raLivenessRange::GetVirtualRegister() const +{ + return virtualRegister; +} + +sint32 raLivenessRange::GetPhysicalRegister() const +{ + return physicalRegister; +} + +IMLName raLivenessRange::GetName() const +{ + return name; +} + +void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister) { - IMLRegID regId = subrange->range->virtualRegister; + cemu_assert_suspicious(); // not used yet + this->physicalRegister = physicalRegister; +} + +void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister) +{ + auto clusterRanges = GetAllSubrangesInCluster(); + for(auto& range : clusterRanges) + range->physicalRegister = physicalRegister; +} + +boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() +{ + uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + boost::container::small_vector subranges; + subranges.push_back(this); + this->lastIterationIndex = iterationIndex; + size_t i = 0; + while(isubrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchTaken); + } + if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchNotTaken); + } + // check predecessors + for(auto& prev : cur->previousRanges) + { + if(prev->lastIterationIndex != iterationIndex) + { + prev->lastIterationIndex = iterationIndex; + subranges.push_back(prev); + } + } + } + return subranges; +} + +void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) +{ + IMLRegID regId = subrange->GetVirtualRegister(); auto it = root.find(regId); if (it == root.end()) { // new single element root.try_emplace(regId, subrange); - subrange->link_sameVirtualRegisterGPR.prev = nullptr; - subrange->link_sameVirtualRegisterGPR.next = nullptr; + subrange->link_sameVirtualRegister.prev = nullptr; + subrange->link_sameVirtualRegister.next = nullptr; } else { // insert in first position - subrange->link_sameVirtualRegisterGPR.next = it->second; + subrange->link_sameVirtualRegister.next = it->second; it->second = subrange; - subrange->link_sameVirtualRegisterGPR.prev = subrange; + subrange->link_sameVirtualRegister.prev = subrange; } } -void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) { - subrange->link_segmentSubrangesGPR.next = *root; + subrange->link_allSegmentRanges.next = *root; if (*root) - (*root)->link_segmentSubrangesGPR.prev = subrange; - subrange->link_segmentSubrangesGPR.prev = nullptr; + (*root)->link_allSegmentRanges.prev = subrange; + subrange->link_allSegmentRanges.prev = nullptr; *root = subrange; } -void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) +void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) { - IMLRegID regId = subrange->range->virtualRegister; - raLivenessSubrange_t* nextRange = subrange->link_sameVirtualRegisterGPR.next; - raLivenessSubrange_t* prevRange = subrange->link_sameVirtualRegisterGPR.prev; - raLivenessSubrange_t* newBase = prevRange ? prevRange : nextRange; + IMLRegID regId = subrange->GetVirtualRegister(); + raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; + raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; + raLivenessRange* newBase = prevRange ? prevRange : nextRange; if (prevRange) - prevRange->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; + prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next; if (nextRange) - nextRange->link_sameVirtualRegisterGPR.prev = subrange->link_sameVirtualRegisterGPR.prev; + nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev; if (!prevRange) { @@ -55,81 +120,78 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_maplink_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1; + subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1; + subrange->link_sameVirtualRegister.next = (raLivenessRange*)1; #endif } -void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) { - raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev; - if (subrange->link_segmentSubrangesGPR.prev) - subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next; + raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev; + if (subrange->link_allSegmentRanges.prev) + subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next; else - (*root) = subrange->link_segmentSubrangesGPR.next; - if (subrange->link_segmentSubrangesGPR.next) - subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev; + (*root) = subrange->link_allSegmentRanges.next; + if (subrange->link_allSegmentRanges.next) + subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev; #ifdef CEMU_DEBUG_ASSERT - subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1; + subrange->link_allSegmentRanges.prev = (raLivenessRange*)1; + subrange->link_allSegmentRanges.next = (raLivenessRange*)1; #endif } -MemoryPoolPermanentObjects memPool_livenessRange(4096); -MemoryPoolPermanentObjects memPool_livenessSubrange(4096); +MemoryPoolPermanentObjects memPool_livenessSubrange(4096); -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name) +raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex) { - raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj(); - livenessRange->list_subranges.resize(0); - livenessRange->virtualRegister = virtualRegister; - livenessRange->name = name; - livenessRange->physicalRegister = -1; - ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange); - return livenessRange; -} - -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex) -{ - raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj(); - livenessSubrange->list_locations.resize(0); - livenessSubrange->range = range; - livenessSubrange->imlSegment = imlSegment; - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex); - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex); + raLivenessRange* range = memPool_livenessSubrange.acquireObj(); + range->previousRanges.clear(); + range->list_locations.resize(0); + range->imlSegment = imlSegment; + PPCRecompilerIml_setSegmentPoint(&range->start, imlSegment, startIndex); + PPCRecompilerIml_setSegmentPoint(&range->end, imlSegment, endIndex); + // register mapping + range->virtualRegister = virtualRegister; + range->name = name; + range->physicalRegister = -1; // default values - livenessSubrange->hasStore = false; - livenessSubrange->hasStoreDelayed = false; - livenessSubrange->lastIterationIndex = 0; - livenessSubrange->subrangeBranchNotTaken = nullptr; - livenessSubrange->subrangeBranchTaken = nullptr; - livenessSubrange->_noLoad = false; - // add to range - range->list_subranges.push_back(livenessSubrange); - // add to segment - PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, livenessSubrange); - PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange); - return livenessSubrange; + range->hasStore = false; + range->hasStoreDelayed = false; + range->lastIterationIndex = 0; + range->subrangeBranchNotTaken = nullptr; + range->subrangeBranchTaken = nullptr; + range->_noLoad = false; + // add to segment linked lists + PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); + PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range); + return range; } -void _unlinkSubrange(raLivenessSubrange_t* subrange) +void _unlinkSubrange(raLivenessRange* subrange) { IMLSegment* imlSegment = subrange->imlSegment; - PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, subrange); - PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); + PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange); + PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange); } -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) +void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); - subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); + //subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); subrange->list_locations.clear(); + // unlink reverse references + if(subrange->subrangeBranchTaken) + subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange)); + if(subrange->subrangeBranchNotTaken) + subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange)); + PPCRecompilerIml_removeSegmentPoint(&subrange->start); PPCRecompilerIml_removeSegmentPoint(&subrange->end); memPool_livenessSubrange.releaseObj(subrange); } -void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) +// leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore +void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); PPCRecompilerIml_removeSegmentPoint(&subrange->start); @@ -137,49 +199,30 @@ void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenCont memPool_livenessSubrange.releaseObj(subrange); } -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) -{ - for (auto& subrange : range->list_subranges) - { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); - } - ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range)); - memPool_livenessRange.releaseObj(range); -} - -void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) +void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { - for (auto& subrange : range->list_subranges) + auto clusterRanges = subrange->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); + _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange); } - memPool_livenessRange.releaseObj(range); } void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) { - for(auto& range : ppcImlGenContext->raInfo.list_ranges) + for(auto& seg : ppcImlGenContext->segmentList2) { - PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range); - } - ppcImlGenContext->raInfo.list_ranges.clear(); -} - -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange) -{ - cemu_assert_debug(range != absorbedRange); - cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister); - // move all subranges from absorbedRange to range - for (auto& subrange : absorbedRange->list_subranges) - { - range->list_subranges.push_back(subrange); - subrange->range = range; + raLivenessRange* cur; + while(cur = seg->raInfo.linkedList_allSubranges) + { + _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur); + } + seg->raInfo.linkedList_allSubranges = nullptr; + seg->raInfo.linkedList_perVirtualRegister.clear(); } - absorbedRange->list_subranges.clear(); - PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange); } -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange) +void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange) { #ifdef CEMU_DEBUG_ASSERT PPCRecRA_debugValidateSubrange(subrange); @@ -193,6 +236,12 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub if (subrange == absorbedSubrange) assert_dbg(); #endif + + // update references + if(absorbedSubrange->subrangeBranchTaken) + *std::find(absorbedSubrange->subrangeBranchTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange; + if(absorbedSubrange->subrangeBranchNotTaken) + *std::find(absorbedSubrange->subrangeBranchNotTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; @@ -210,29 +259,27 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange); } -// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges) -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) +// remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges) +void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) { - if (range->list_subranges.size() == 1) - assert_dbg(); - for (auto& subrange : range->list_subranges) + auto clusterRanges = originRange->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { if (subrange->list_locations.empty()) continue; - raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name); - raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); + raLivenessRange* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); // copy locations for (auto& location : subrange->list_locations) { newSubrange->list_locations.push_back(location); } } - // remove original range - PPCRecRA_deleteRange(ppcImlGenContext, range); + // remove subranges + PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange); } #ifdef CEMU_DEBUG_ASSERT -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) +void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange) { // validate subrange if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken) @@ -252,7 +299,7 @@ void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} // The return value is the tail subrange // If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations // Ranges that begin at RA_INTER_RANGE_START are allowed and can be split -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole) +raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole) { // validation #ifdef CEMU_DEBUG_ASSERT @@ -266,8 +313,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC assert_dbg(); #endif // create tail - raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name); - raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index); + raLivenessRange* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), splitIndex, subrange->end.index); // copy locations for (auto& location : subrange->list_locations) { @@ -312,7 +358,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC return tailSubrange; } -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite) +void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite) { if (subrange->list_locations.empty()) { @@ -339,13 +385,12 @@ sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment) return v*v; // 25, 100, 225, 400 } -// calculate cost of entire range -// ignores data flow and does not detect avoidable reads/stores -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) +// calculate cost of entire range cluster +sint32 PPCRecRARange_estimateTotalCost(std::span ranges) { sint32 cost = 0; - // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). + // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). // currently we calculate the cost based on the most expensive entry/exit point @@ -354,7 +399,7 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) sint32 readCount = 0; sint32 writeCount = 0; - for (auto& subrange : range->list_subranges) + for (auto& subrange : ranges) { if (subrange->start.index != RA_INTER_RANGE_START) { @@ -375,10 +420,11 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) } // calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range) +sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange) { - sint32 cost = -PPCRecRARange_estimateCost(range); - for (auto& subrange : range->list_subranges) + auto ranges = subrange->GetAllSubrangesInCluster(); + sint32 cost = -PPCRecRARange_estimateTotalCost(ranges); + for (auto& subrange : ranges) { if (subrange->list_locations.empty()) continue; @@ -387,7 +433,7 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* return cost; } -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex) +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex) { // validation #ifdef CEMU_DEBUG_ASSERT diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 28fbe9063..31deaab37 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -1,26 +1,77 @@ #pragma once -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name); -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex); -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange); -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); +struct raLivenessLocation_t +{ + sint32 index; + bool isRead; + bool isWrite; + + raLivenessLocation_t() = default; + + raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) + : index(index), isRead(isRead), isWrite(isWrite) {}; +}; + +struct raLivenessSubrangeLink +{ + struct raLivenessRange* prev; + struct raLivenessRange* next; +}; + +struct raLivenessRange +{ + IMLSegment* imlSegment; + IMLSegmentPoint start; + IMLSegmentPoint end; + // dirty state tracking + bool _noLoad; + bool hasStore; + bool hasStoreDelayed; + // next + raLivenessRange* subrangeBranchTaken; + raLivenessRange* subrangeBranchNotTaken; + // reverse counterpart of BranchTaken/BranchNotTaken + boost::container::small_vector previousRanges; + // processing + uint32 lastIterationIndex; + // instruction locations + std::vector list_locations; + // linked list (subranges with same GPR virtual register) + raLivenessSubrangeLink link_sameVirtualRegister; + // linked list (all subranges for this segment) + raLivenessSubrangeLink link_allSegmentRanges; + // register mapping (constant) + IMLRegID virtualRegister; + IMLName name; + // register allocator result + sint32 physicalRegister; + + boost::container::small_vector GetAllSubrangesInCluster(); + + IMLRegID GetVirtualRegister() const; + sint32 GetPhysicalRegister() const; + IMLName GetName() const; + void SetPhysicalRegister(sint32 physicalRegister); + void SetPhysicalRegisterForCluster(sint32 physicalRegister); +}; + +raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex); +void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange); -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); +void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange); +void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false); +raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole = false); -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite); -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange); +void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite); +void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); // cost estimation sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex); +sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange); +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); // special values to mark the index of ranges that reach across the segment border #define RA_INTER_RANGE_START (-1) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index f0420b011..0589d6603 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,6 +1,8 @@ #pragma once #include "IMLInstruction.h" +#include + struct IMLSegmentPoint { sint32 index; @@ -9,63 +11,14 @@ struct IMLSegmentPoint IMLSegmentPoint* prev; }; -struct raLivenessLocation_t -{ - sint32 index; - bool isRead; - bool isWrite; - - raLivenessLocation_t() = default; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; -}; - -struct raLivenessSubrangeLink_t -{ - struct raLivenessSubrange_t* prev; - struct raLivenessSubrange_t* next; -}; - -struct raLivenessSubrange_t -{ - struct raLivenessRange_t* range; - IMLSegment* imlSegment; - IMLSegmentPoint start; - IMLSegmentPoint end; - // dirty state tracking - bool _noLoad; - bool hasStore; - bool hasStoreDelayed; - // next - raLivenessSubrange_t* subrangeBranchTaken; - raLivenessSubrange_t* subrangeBranchNotTaken; - // processing - uint32 lastIterationIndex; - // instruction locations - std::vector list_locations; - // linked list (subranges with same GPR virtual register) - raLivenessSubrangeLink_t link_sameVirtualRegisterGPR; - // linked list (all subranges for this segment) - raLivenessSubrangeLink_t link_segmentSubrangesGPR; -}; - -struct raLivenessRange_t -{ - IMLRegID virtualRegister; - sint32 physicalRegister; - IMLName name; - std::vector list_subranges; -}; - struct PPCSegmentRegisterAllocatorInfo_t { // used during loop detection bool isPartOfProcessedLoop{}; sint32 lastIterationIndex{}; // linked lists - raLivenessSubrange_t* linkedList_allSubranges{}; - std::unordered_map linkedList_perVirtualGPR2; + struct raLivenessRange* linkedList_allSubranges{}; + std::unordered_map linkedList_perVirtualRegister; }; struct IMLSegment diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 25a2c1635..846426f58 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -16,6 +16,7 @@ #include "IML/IML.h" #include "IML/IMLRegisterAllocator.h" #include "BackendX64/BackendX64.h" +#include "util/highresolutiontimer/HighResolutionTimer.h" struct PPCInvalidationRange { @@ -157,6 +158,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; + BenchmarkTimer bt; + bt.Start(); + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; ppcImlGenContext.debug_entryPPCAddress = range.startAddress; @@ -240,9 +244,18 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } + bt.Stop(); + //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); - cemuLog_logDebug(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code); + uint32 codeHash = 0; + for (uint32 i = 0; i < ppcRecFunc->x86Size; i++) + { + codeHash = _rotr(codeHash, 3); + codeHash += ((uint8*)ppcRecFunc->x86Code)[i]; + } + + //cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); return ppcRecFunc; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 94b3fcd97..706855d4e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -54,11 +54,6 @@ struct ppcImlGenContext_t std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode - // register allocator info - struct - { - std::vector list_ranges; - }raInfo; // analysis info struct { From dcbaa5a43a01990a46c2e6e8b4c7189bbeb76b34 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:06:12 +0200 Subject: [PATCH 44/64] PPCRec: Add RA support for instructions with register constraints Also make interval tracking more fine grained and differentiate between input and output edges of each instruction --- .../Recompiler/BackendX64/BackendX64.cpp | 153 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 73 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 1 + .../Recompiler/IML/IMLRegisterAllocator.cpp | 1691 ++++++++++++----- .../Recompiler/IML/IMLRegisterAllocator.h | 20 +- .../IML/IMLRegisterAllocatorRanges.cpp | 398 +++- .../IML/IMLRegisterAllocatorRanges.h | 301 ++- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 111 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 221 ++- .../Recompiler/PPCRecompilerImlGen.cpp | 81 +- 10 files changed, 2311 insertions(+), 739 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 67585ed2a..1d38eb3b2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -579,31 +579,23 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, return true; } -bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut)); auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA); auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue); auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue); - // make sure non of the regs are in EAX - if (regEA == X86_REG_EAX || - regBoolOut == X86_REG_EAX || - regVal == X86_REG_EAX || - regCmp == X86_REG_EAX) - { - printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n"); - return false; - } + cemu_assert_debug(regBoolOut == X86_REG_EAX); + cemu_assert_debug(regEA != X86_REG_EAX); + cemu_assert_debug(regVal != X86_REG_EAX); + cemu_assert_debug(regCmp != X86_REG_EAX); - x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp); - x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc x64GenContext->emitter->LockPrefix(); x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal); x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut); - x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); - return true; + x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here } bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -908,78 +900,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - // x86's shift and rotate instruction have the shift amount hardwired to the CL register - // since our register allocator doesn't support instruction based fixed phys registers yet - // we'll instead have to temporarily shuffle registers around - - // we use BMI2's shift instructions until the RA can assign fixed registers - if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + if(g_CPUFeatures.x86.bmi2) { - x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) - { - x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); - } - else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + else { - x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + cemu_assert_debug(rRegResult != rRegOperand2); + cemu_assert_debug(rRegResult != X86_REG_RCX); + cemu_assert_debug(rRegOperand2 == X86_REG_RCX); + if(rRegOperand1 != rRegResult) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + x64GenContext->emitter->SAR_d_CL(rRegResult); + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + x64GenContext->emitter->SHR_d_CL(rRegResult); + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + x64GenContext->emitter->SHL_d_CL(rRegResult); } - - //auto rResult = _reg32(rRegResult); - //auto rOp2 = _reg8_from_reg32(_reg32(rRegOperand2)); - - //if (rRegResult == rRegOperand2) - //{ - // if (rRegResult != rRegOperand1) - // DEBUG_BREAK; // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2) - //} - - //if(rRegOperand1 != rRegResult) - // x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - - //cemu_assert_debug(rRegOperand1 != X86_REG_ECX); - - //if (rRegOperand2 == X86_REG_ECX) - //{ - // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) - // x64GenContext->emitter->SAR_d_CL(rResult); - // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) - // x64GenContext->emitter->SHR_d_CL(rResult); - // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) - // x64GenContext->emitter->SHL_d_CL(rResult); - // else - // cemu_assert_unimplemented(); - //} - //else - //{ - // auto rRegResultOrg = rRegResult; - // if (rRegResult == X86_REG_ECX) - // { - // x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegResult); - // rRegResult = REG_RESV_TEMP; - // rResult = _reg32(rRegResult); - // } - // - // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); - // - // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) - // x64GenContext->emitter->SAR_d_CL(rResult); - // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) - // x64GenContext->emitter->SHR_d_CL(rResult); - // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) - // x64GenContext->emitter->SHL_d_CL(rResult); - // else - // cemu_assert_unimplemented(); - - // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); - - // // move result back if it was in ECX - // if (rRegResultOrg == X86_REG_ECX) - // { - // x64Gen_mov_reg64_reg64(x64GenContext, rRegResultOrg, REG_RESV_TEMP); - // } - //} } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { @@ -1093,9 +1036,19 @@ bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction auto regA = _reg32(imlInstruction->op_compare.regA); auto regB = _reg32(imlInstruction->op_compare.regB); X86Cond cond = _x86Cond(imlInstruction->op_compare.cond); - x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc - x64GenContext->emitter->CMP_dd(regA, regB); - x64GenContext->emitter->SETcc_b(cond, regR); + bool keepR = regR == regA || regR == regB; + if(!keepR) + { + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_dd(regA, regB); + x64GenContext->emitter->SETcc_b(cond, regR); + } + else + { + x64GenContext->emitter->CMP_dd(regA, regB); + x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); + x64GenContext->emitter->SETcc_b(cond, regR); + } return true; } @@ -1105,9 +1058,19 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunc auto regA = _reg32(imlInstruction->op_compare_s32.regA); sint32 imm = imlInstruction->op_compare_s32.immS32; X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond); - x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc - x64GenContext->emitter->CMP_di32(regA, imm); - x64GenContext->emitter->SETcc_b(cond, regR); + bool keepR = regR == regA; + if(!keepR) + { + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_di32(regA, imm); + x64GenContext->emitter->SETcc_b(cond, regR); + } + else + { + x64GenContext->emitter->CMP_di32(regA, imm); + x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); + x64GenContext->emitter->SETcc_b(cond, regR); + } return true; } @@ -1202,7 +1165,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction { if( regA != regR ) x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); - if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) @@ -1224,19 +1186,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry); - cemu_assert_debug(regCarry != regR && regCarry != regA); + cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register + + bool delayCarryInit = regCarry == regA; switch (imlInstruction->operation) { case PPCREC_IML_OP_ADD: - x64GenContext->emitter->XOR_dd(regCarry, regCarry); + if(!delayCarryInit) + x64GenContext->emitter->XOR_dd(regCarry, regCarry); if (regR != regA) x64GenContext->emitter->MOV_dd(regR, regA); x64GenContext->emitter->ADD_di32(regR, immS32); + if(delayCarryInit) + x64GenContext->emitter->MOV_di32(regCarry, 0); x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); break; case PPCREC_IML_OP_ADD_WITH_CARRY: // assumes that carry is already correctly initialized as 0 or 1 + cemu_assert_debug(regCarry != regR); if (regR != regA) x64GenContext->emitter->MOV_dd(regR, regA); x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag @@ -1600,8 +1568,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction)) - codeGenerationFailed = true; + PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index cca8b61e4..4850ed816 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -91,25 +91,37 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml sint32 index = currentLineText.getLen(); while (index < 70) { - debug_printf(" "); + currentLineText.add(" "); index++; } raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - if (offset == subrangeItr->start.index) + if (subrangeItr->interval2.start.GetInstructionIndexEx() == offset) { - debug_printf("|%-2d", subrangeItr->GetVirtualRegister()); + if(subrangeItr->interval2.start.IsInstructionIndex() && !subrangeItr->interval2.start.IsOnInputEdge()) + currentLineText.add("."); + else + currentLineText.add("|"); + + currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister()); + } + else if (subrangeItr->interval2.end.GetInstructionIndexEx() == offset) + { + if(subrangeItr->interval2.end.IsInstructionIndex() && !subrangeItr->interval2.end.IsOnOutputEdge()) + currentLineText.add("* "); + else + currentLineText.add("| "); } - else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) + else if (subrangeItr->interval2.ContainsInstructionIndexEx(offset)) { - debug_printf("| "); + currentLineText.add("| "); } else { - debug_printf(" "); + currentLineText.add(" "); } - index += 3; + index += 5; // next subrangeItr = subrangeItr->link_allSegmentRanges.next; } @@ -446,7 +458,7 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { - StringBuf strOutput(1024); + StringBuf strOutput(4096); strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); if (imlSegment->isEnterable) @@ -457,13 +469,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg)); } - debug_printf("%s\n", strOutput.c_str()); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); if (printLivenessRangeInfo) { strOutput.reset(); IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); - debug_printf("%s\n", strOutput.c_str()); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); } //debug_printf("\n"); strOutput.reset(); @@ -475,53 +487,56 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool // don't log NOP instructions if (inst.type == PPCREC_IML_TYPE_NO_OP) continue; - //strOutput.addFmt("{:02x} ", i); - debug_printf(fmt::format("{:02x} ", i).c_str()); + strOutput.reset(); + strOutput.addFmt("{:02x} ", i); + //cemuLog_log(LogType::Force, "{:02x} ", i); disassemblyLine.clear(); IMLDebug_DisassembleInstruction(inst, disassemblyLine); - debug_printf("%s", disassemblyLine.c_str()); + strOutput.add(disassemblyLine); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i); } - debug_printf("\n"); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); } // all ranges if (printLivenessRangeInfo) { - debug_printf("Ranges-VirtReg "); + strOutput.reset(); + strOutput.add("Ranges-VirtReg "); raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("v%-2d", subrangeItr->GetVirtualRegister()); + strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister()); subrangeItr = subrangeItr->link_allSegmentRanges.next; } - debug_printf("\n"); - debug_printf("Ranges-PhysReg "); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + strOutput.reset(); + strOutput.add("Ranges-PhysReg "); subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("p%-2d", subrangeItr->GetPhysicalRegister()); + strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister()); subrangeItr = subrangeItr->link_allSegmentRanges.next; } - debug_printf("\n"); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); } // branch info - debug_printf("Links from: "); + strOutput.reset(); + strOutput.add("Links from: "); for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++) { if (i) - debug_printf(", "); - debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); + strOutput.add(", "); + strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); } - debug_printf("\n"); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); + cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); if (imlSegment->nextSegmentBranchTaken) - debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); + cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); if (imlSegment->nextSegmentIsUncertain) - debug_printf("Dynamic target\n"); - debug_printf("\n"); + cemuLog_log(LogType::Force, "Dynamic target"); } void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo) @@ -529,6 +544,6 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRange for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo); - debug_printf("\n"); + cemuLog_log(LogType::Force, ""); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 78863931c..7594bc9f6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -1,6 +1,7 @@ #pragma once using IMLRegID = uint16; // 16 bit ID +using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned // format of IMLReg: // 0-15 (16 bit) IMLRegID diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index a59b88bd2..9b9ce15fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -7,6 +7,7 @@ #include "../BackendX64/BackendX64.h" +#include #include struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment @@ -50,6 +51,45 @@ struct IMLRegisterAllocatorContext }; +struct IMLFixedRegisters +{ + struct Entry + { + Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) : reg(reg), physRegSet(physRegSet) {} + + IMLReg reg; + IMLPhysRegisterSet physRegSet; + }; + boost::container::static_vector listInput; // fixed registers for input edge + boost::container::static_vector listOutput; // fixed registers for output edge +}; + +static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) +{ + fixedRegs.listInput.clear(); + fixedRegs.listOutput.clear(); + + // x86 specific logic is hardcoded for now + if(instruction->type == PPCREC_IML_TYPE_R_R_R) + { + if(instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + { + // todo: We can skip this if g_CPUFeatures.x86.bmi2 is set, but for now we just assume it's not so we can properly test increased register pressure + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_ECX); + fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps); + } + } + else if(instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); + fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); + } + // todo - for volatile registers during call, we can emit a bunch of ranges that cover the output edge of the CALL instruction and use a special vGPR to indicate its not an actually mapped register +} + + uint32 PPCRecRA_getNextIterationIndex() { static uint32 recRACurrentIterationIndex = 0; @@ -119,20 +159,95 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml #define SUBRANGE_LIST_SIZE (128) -sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessRange* subrange, sint32 startIndex) +sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition) { + sint32 startInstructionIndex; + if(startPosition.ConnectsToPreviousSegment()) + startInstructionIndex = 0; + else + startInstructionIndex = startPosition.GetInstructionIndex(); for (sint32 i = 0; i < subrange->list_locations.size(); i++) { - if (subrange->list_locations.data()[i].index >= startIndex) - return subrange->list_locations.data()[i].index - startIndex; + if (subrange->list_locations[i].index >= startInstructionIndex) + { + sint32 preciseIndex = subrange->list_locations[i].index * 2; + cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access + // check read edge + if(subrange->list_locations[i].isRead) + { + if(preciseIndex >= startPosition.GetRaw()) + return preciseIndex - startPosition.GetRaw(); + } + // check write edge + if(subrange->list_locations[i].isWrite) + { + preciseIndex++; + if(preciseIndex >= startPosition.GetRaw()) + return preciseIndex - startPosition.GetRaw(); + } + } + } + cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000); + return 10001*2; +} + +// returns -1 if there is no fixed register requirement on or after startPosition +sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess) +{ + hasFixedAccess = false; + cemu_assert_debug(startPosition.IsInstructionIndex()); + for(auto& fixedReqEntry : range->list_fixedRegRequirements) + { + if(fixedReqEntry.pos < startPosition) + continue; + if(fixedReqEntry.allowedReg.IsAvailable(physRegister)) + { + hasFixedAccess = true; + return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw(); + } + } + cemu_assert_debug(range->interval2.end.IsInstructionIndex()); + return range->interval2.end.GetRaw() - startPosition.GetRaw(); +} + +sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister) +{ + cemu_assert_debug(startPosition.IsInstructionIndex()); + raInstructionEdge lastPos2; + lastPos2.Set(imlSegment->imlList.size(), false); + + raInstructionEdge endPos; + endPos = startPosition + maxDistance; + if(endPos > lastPos2) + endPos = lastPos2; + IMLFixedRegisters fixedRegs; + if(startPosition.IsOnOutputEdge()) + GetInstructionFixedRegisters(imlSegment->imlList.data()+startPosition.GetInstructionIndex(), fixedRegs); + for(raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos) + { + if(currentPos.IsOnInputEdge()) + { + GetInstructionFixedRegisters(imlSegment->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs); + } + auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; + for(auto& fixedRegLoc : fixedRegAccess) + { + if(fixedRegLoc.reg.GetRegID() != ourRegId) + { + cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider + if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) + return currentPos.GetRaw() - startPosition.GetRaw(); + } + } } - return INT_MAX; + return endPos.GetRaw() - startPosition.GetRaw(); } -// count how many instructions there are until physRegister is used by any subrange (returns 0 if register is in use at startIndex, and INT_MAX if not used for the remainder of the segment) -sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, sint32 startIndex, sint32 physRegister) +// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex) +sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister) { - sint32 minDistance = INT_MAX; + cemu_assert_debug(startPosition.IsInstructionIndex()); + sint32 minDistance = (sint32)imlSegment->imlList.size()*2 - startPosition.GetRaw(); // next raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) @@ -142,12 +257,16 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) + if(subrangeItr->interval2.ContainsEdge(startPosition)) return 0; - if (subrangeItr->start.index >= startIndex) + if (subrangeItr->interval2.end < startPosition) { - minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + continue; } + cemu_assert_debug(startPosition <= subrangeItr->interval2.start); + sint32 currentDist = subrangeItr->interval2.start.GetRaw() - startPosition.GetRaw(); + minDistance = std::min(minDistance, currentDist); subrangeItr = subrangeItr->link_allSegmentRanges.next; } return minDistance; @@ -155,20 +274,6 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe struct IMLRALivenessTimeline { -// IMLRALivenessTimeline(raLivenessSubrange_t* subrangeChain) -// { -//#ifdef CEMU_DEBUG_ASSERT -// raLivenessSubrange_t* it = subrangeChain; -// raLivenessSubrange_t* prevIt = it; -// while (it) -// { -// cemu_assert_debug(prevIt->start.index <= it->start.index); -// prevIt = it; -// it = it->link_segmentSubrangesGPR.next; -// } -//#endif -// } - IMLRALivenessTimeline() { } @@ -182,12 +287,13 @@ struct IMLRALivenessTimeline // remove all ranges from activeRanges with end <= instructionIndex void ExpireRanges(sint32 instructionIndex) { + __debugbreak(); // maybe replace calls with raInstructionEdge variant? expiredRanges.clear(); size_t count = activeRanges.size(); for (size_t f = 0; f < count; f++) { raLivenessRange* liverange = activeRanges[f]; - if (liverange->end.index <= instructionIndex) + if (liverange->interval2.end.GetInstructionIndex() < instructionIndex) // <= to < since end is now inclusive { #ifdef CEMU_DEBUG_ASSERT if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) @@ -204,28 +310,63 @@ struct IMLRALivenessTimeline activeRanges.resize(count); } + void ExpireRanges(raInstructionEdge expireUpTo) + { + expiredRanges.clear(); + size_t count = activeRanges.size(); + for (size_t f = 0; f < count; f++) + { + raLivenessRange* liverange = activeRanges[f]; + if (liverange->interval2.end < expireUpTo) // this was <= but since end is not inclusive we need to use < + { +#ifdef CEMU_DEBUG_ASSERT + if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) + assert_dbg(); // infinite subranges should not expire +#endif + expiredRanges.emplace_back(liverange); + // remove entry + activeRanges[f] = activeRanges[count-1]; + f--; + count--; + } + } + if(count != activeRanges.size()) + activeRanges.resize(count); + } + std::span GetExpiredRanges() { return { expiredRanges.data(), expiredRanges.size() }; } + std::span GetActiveRanges() + { + return { activeRanges.data(), activeRanges.size() }; + } + + raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId) + { + for(auto& it : activeRanges) + if(it->virtualRegister == regId) + return it; + return nullptr; + } + + raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg) + { + cemu_assert_debug(physReg >= 0); + for(auto& it : activeRanges) + if(it->physicalRegister == physReg) + return it; + return nullptr; + } + boost::container::small_vector activeRanges; private: boost::container::small_vector expiredRanges; }; -bool IsRangeOverlapping(raLivenessRange* rangeA, raLivenessRange* rangeB) -{ - if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) - return true; - if ((rangeA->start.index == RA_INTER_RANGE_START && rangeA->start.index == rangeB->start.index)) - return true; - if (rangeA->end.index == RA_INTER_RANGE_END && rangeA->end.index == rangeB->end.index) - return true; - return false; -} - // mark occupied registers by any overlapping range as unavailable in physRegSet void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet) { @@ -242,7 +383,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if(IsRangeOverlapping(subrange, subrangeItr)) + if(subrange->interval2.IsOverlapping(subrangeItr->interval2)) { if (subrangeItr->GetPhysicalRegister() >= 0) physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); @@ -253,7 +394,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP } } -bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->start.index < rhs->start.index; } +bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->interval2.start < rhs->interval2.start; } void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) { @@ -291,13 +432,14 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #ifdef CEMU_DEBUG_ASSERT sint32 count2 = 0; subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - sint32 currentStartIndex = RA_INTER_RANGE_START; + raInstructionEdge currentStartPosition; + currentStartPosition.SetRaw(RA_INTER_RANGE_START); while (subrangeItr) { count2++; - if (subrangeItr->start.index < currentStartIndex) + if (subrangeItr->interval2.start < currentStartPosition) assert_dbg(); - currentStartIndex = subrangeItr->start.index; + currentStartPosition = subrangeItr->interval2.start; // next subrangeItr = subrangeItr->link_allSegmentRanges.next; } @@ -319,74 +461,626 @@ raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) return it->second; } -raLivenessRange* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) +struct raFixedRegRequirementWithVGPR { - uint32 regId = regToSearch.GetRegID(); - raLivenessRange* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); - while (subrangeItr) + raInstructionEdge pos; + IMLPhysRegisterSet allowedReg; + IMLRegID regId; +}; + +std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment) +{ + std::vector frrList; + + size_t index = 0; + IMLUsedRegisters gprTracking; + while (index < imlSegment->imlList.size()) { - if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) - return subrangeItr; - subrangeItr = subrangeItr->link_sameVirtualRegister.next; + IMLFixedRegisters fixedRegs; + GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); + raInstructionEdge pos; + pos.Set(index, true); + for(auto& fixedRegAccess : fixedRegs.listInput) + { + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); + } + pos = pos + 1; + for(auto& fixedRegAccess : fixedRegs.listOutput) + { + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); + } + index++; } - return nullptr; + return frrList; } -void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessRange* subrange, sint32 instructionIndex) +boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg) { - DEBUG_BREAK; + boost::container::small_vector rangeList; + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + if(!currentRange->interval2.ContainsEdge(pos)) + continue; + IMLPhysRegisterSet allowedRegs; + if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + continue; + if(allowedRegs.IsAvailable(physReg)) + rangeList.emplace_back(currentRange); + } + return rangeList; } void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) - // algorithm goes as follows: - // 1) Iterate all instructions in the function from beginning to end and keep a list of active ranges for the currently iterated instruction - // 2) If we encounter an instruction with a fixed register requirement we: - // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction - // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves - // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction - // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range - // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range - // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care - - // experimental code - //for (size_t i = 0; i < imlSegment->imlList.size(); i++) - //{ - // IMLInstruction& inst = imlSegment->imlList[i]; - // if (inst.type == PPCREC_IML_TYPE_R_R_R) - // { - // if (inst.operation == PPCREC_IML_OP_LEFT_SHIFT) - // { - // // get the virtual reg which needs to be assigned a fixed register - // //IMLUsedRegisters usedReg; - // //inst.CheckRegisterUsage(&usedReg); - // IMLReg rB = inst.op_r_r_r.regB; - // // rB needs to use RCX/ECX - // raLivenessSubrange_t* subrange = _GetSubrangeByInstructionIndexAndVirtualReg(imlSegment, rB, i); - // cemu_assert_debug(subrange->range->physicalRegister < 0); // already has a phys reg assigned - // // make sure RCX/ECX is free - // // split before (if needed) and after instruction so that we get a new 1-instruction long range for which we can assign the physical register - // raLivenessSubrange_t* instructionRange = subrange->start.index < i ? PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrange, i, false) : subrange; - // raLivenessSubrange_t* tailRange = PPCRecRA_splitLocalSubrange(ppcImlGenContext, instructionRange, i+1, false); - - // } - // } - //} + // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border (we can later optimize this) + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + continue; + if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + { + PPCRecRA_explodeRange(ppcImlGenContext, currentRange); + // currentRange may be invalidated, therefore iterate from the beginning again (todo - can be optimized) + currentRange = imlSegment->raInfo.linkedList_allSubranges; + } + } + // second pass - look for ranges with conflicting fixed register requirements and split these too (locally) + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + continue; + if(allowedRegs.HasAnyAvailable()) + continue; + cemu_assert_unimplemented(); + } + // third pass - assign fixed registers, split ranges if needed + std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment); + std::unordered_map lastVGPR; + for(size_t i=0; isecond != entry.regId; + else + vgprHasChanged = true; + lastVGPR[physReg] = entry.regId; + + if(!vgprHasChanged) + continue; + + boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); + cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement + + for(auto& range : overlappingRanges) + { + if(range->interval2.start < entry.pos) + { + PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true); + } + } + } + // finally iterate ranges and assign fixed registers + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); + continue; + } + cemu_assert_debug(allowedRegs.HasExactlyOneAvailable()); + currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg()); + } + // DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned +#ifdef CEMU_DEBUG_ASSERT + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if(!currentRange->HasPhysicalRegister()) + continue; + for(raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next) + { + if(currentRange == currentRange2) + continue; + if(currentRange->interval2.IsOverlapping(currentRange2->interval2)) + { + cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister()); + } + } + } + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); + continue; + } + cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister())); + } +#endif +} + +// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output) +// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified +void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos) +{ + // we ignore the instruction for now and just always make it a safe split position + cemu_assert_debug(pos.IsInstructionIndex()); + if(pos.IsOnOutputEdge()) + pos = pos - 1; +} + +// convenience wrapper for IMLRA_MakeSafeSplitPosition +void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance) +{ + cemu_assert_debug(startPos.IsInstructionIndex()); + cemu_assert_debug(distance >= 0); + raInstructionEdge endPos = startPos + distance; + IMLRA_MakeSafeSplitPosition(imlSegment, endPos); + if(endPos < startPos) + { + distance = 0; + return; + } + distance = endPos.GetRaw() - startPos.GetRaw(); +} + +void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx); + +class RASpillStrategy +{ +public: + virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0; + + sint32 GetCost() + { + return strategyCost; + } + +protected: + void ResetCost() + { + strategyCost = INT_MAX; + } + + sint32 strategyCost; +}; + +class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy +{ +public: + void Reset() + { + localRangeHoleCutting.distance = -1; + localRangeHoleCutting.largestHoleSubrange = nullptr; + ResetCost(); + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + raInstructionEdge currentRangeStart = currentRange->interval2.start; + sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + cemu_assert_debug(localRangeHoleCutting.distance == -1); + cemu_assert_debug(strategyCost == INT_MAX); + if(!currentRangeStart.ConnectsToPreviousSegment()) + { + cemu_assert_debug(currentRangeStart.GetRaw() >= 0); + for (auto candidate : timeline.activeRanges) + { + if (candidate->interval2.ExtendsIntoNextSegment()) + continue; + // new checks (Oct 2024): + if(candidate == currentRange) + continue; + if(candidate->GetPhysicalRegister() < 0) + continue; + if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + + sint32 distance2 = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2); + if (distance2 < 2) + continue; + cemu_assert_debug(currentRangeStart.IsInstructionIndex()); + distance2 = std::min(distance2, imlSegment->imlList.size()*2 - currentRangeStart.GetRaw()); // limit distance to end of segment + // calculate split cost of candidate + sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2); + // calculate additional split cost of currentRange if hole is not large enough + if (distance2 < requiredSize2) + { + cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2); + // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) + cost += (requiredSize2 - distance2) / 10; + } + // compare cost with previous candidates + if (cost < strategyCost) + { + strategyCost = cost; + localRangeHoleCutting.distance = distance2; + localRangeHoleCutting.largestHoleSubrange = candidate; + } + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + raInstructionEdge currentRangeStart = currentRange->interval2.start; + + raInstructionEdge holeStartPosition = currentRangeStart; + raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance; + raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange; + + if(collisionRange->interval2.start < holeStartPosition) + { + collisionRange = PPCRecRA_splitLocalSubrange2(nullptr, collisionRange, holeStartPosition, true); + cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point + cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough + } + else + { + cemu_assert_unimplemented(); // we still need to trim? + } + // we may also have to cut the current range to fit partially into the hole + if (requiredSize2 > localRangeHoleCutting.distance) + { + raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true); + if(tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + // verify that the hole is large enough + if(collisionRange) + { + cemu_assert_debug(!collisionRange->interval2.IsOverlapping(currentRange->interval2)); + } + } + +private: + struct + { + sint32 distance; + raLivenessRange* largestHoleSubrange; + }localRangeHoleCutting; +}; + +class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy +{ + // split current range (this is generally only a good choice when the current range is long but has few usages) + public: + void Reset() + { + ResetCost(); + availableRegisterHole.distance = -1; + availableRegisterHole.physRegister = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs) + { + sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + + raInstructionEdge currentRangeStart = currentRange->interval2.start; + cemu_assert_debug(strategyCost == INT_MAX); + availableRegisterHole.distance = -1; + availableRegisterHole.physRegister = -1; + if (currentRangeStart.GetRaw() >= 0) + { + if (localAvailableRegsMask.HasAnyAvailable()) + { + sint32 physRegItr = -1; + while (true) + { + physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); + if (physRegItr < 0) + break; + if(!allowedRegs.IsAvailable(physRegItr)) + continue; + // get size of potential hole for this register + sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr); + + // some instructions may require the same register for another range, check the distance here + sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr); + if(distUntilFixedReg < distance) + distance = distUntilFixedReg; + + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); + if (distance < 2) + continue; + // calculate additional cost due to split + cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register? + sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + // add small additional cost for the remaining range (prefer larger holes) + cost += ((requiredSize2 - distance) / 2) / 10; + if (cost < strategyCost) + { + strategyCost = cost; + availableRegisterHole.distance = distance; + availableRegisterHole.physRegister = physRegItr; + } + } + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + raInstructionEdge currentRangeStart = currentRange->interval2.start; + // use available register + raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true); + if(tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + + private: + struct + { + sint32 physRegister; + sint32 distance; // size of hole + }availableRegisterHole; +}; + +class RASpillStrategy_ExplodeRange : public RASpillStrategy +{ +public: + void Reset() + { + ResetCost(); + explodeRange.range = nullptr; + explodeRange.distance = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + raInstructionEdge currentRangeStart = currentRange->interval2.start; + if(currentRangeStart.ConnectsToPreviousSegment()) + currentRangeStart.Set(0, true); + sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + cemu_assert_debug(strategyCost == INT_MAX); + explodeRange.range = nullptr; + explodeRange.distance = -1; + for (auto candidate : timeline.activeRanges) + { + if (!candidate->interval2.ExtendsIntoNextSegment()) + continue; + // new checks (Oct 2024): + if(candidate == currentRange) + continue; + if(candidate->GetPhysicalRegister() < 0) + continue; + if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + + sint32 distance = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); + if( distance < 2) + continue; + sint32 cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); + // if the hole is not large enough, add cost of splitting current subrange + if (distance < requiredSize2) + { + cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + // add small additional cost for the remaining range (prefer larger holes) + cost += ((requiredSize2 - distance) / 2) / 10; + } + // compare with current best candidate for this strategy + if (cost < strategyCost) + { + strategyCost = cost; + explodeRange.distance = distance; + explodeRange.range = candidate; + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + raInstructionEdge currentRangeStart = currentRange->interval2.start; + if(currentRangeStart.ConnectsToPreviousSegment()) + currentRangeStart.Set(0, true); + sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + // explode range + PPCRecRA_explodeRange(nullptr, explodeRange.range); + // split current subrange if necessary + if( requiredSize2 > explodeRange.distance) + { + raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart+explodeRange.distance, true); + if(tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + } + +private: + struct + { + raLivenessRange* range; + sint32 distance; // size of hole + // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange + }explodeRange; +}; + + +class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy +{ +public: + void Reset() + { + ResetCost(); + explodeRange.range = nullptr; + explodeRange.distance = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + // explode the range with the least cost + cemu_assert_debug(strategyCost == INT_MAX); + cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1); + for(auto candidate : timeline.activeRanges) + { + if (!candidate->interval2.ExtendsIntoNextSegment()) + continue; + // only select candidates that clash with current subrange + if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange) + continue; + // and also filter any that dont meet fixed register requirements + if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + sint32 cost; + cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); + // compare with current best candidate for this strategy + if (cost < strategyCost) + { + strategyCost = cost; + explodeRange.distance = INT_MAX; + explodeRange.range = candidate; + } + } + // add current range as a candidate too + sint32 ownCost; + ownCost = PPCRecRARange_estimateCostAfterRangeExplode(currentRange); + if (ownCost < strategyCost) + { + strategyCost = ownCost; + explodeRange.distance = INT_MAX; + explodeRange.range = currentRange; + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + PPCRecRA_explodeRange(ctx, explodeRange.range); + } + +private: + struct + { + raLivenessRange* range; + sint32 distance; // size of hole + // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange + }explodeRange; +}; + +// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies +void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) +{ + IMLSegment* seg = currentRange->imlSegment; + if(seg->imlList.empty()) + return; // there can be no fixed register requirements if there are no instructions + + raInstructionEdge firstPos = currentRange->interval2.start; + if(currentRange->interval2.start.ConnectsToPreviousSegment()) + firstPos.SetRaw(0); + else if(currentRange->interval2.start.ConnectsToNextSegment()) + firstPos.Set(seg->imlList.size()-1, false); + + raInstructionEdge lastPos = currentRange->interval2.end; + if(currentRange->interval2.end.ConnectsToPreviousSegment()) + lastPos.SetRaw(0); + else if(currentRange->interval2.end.ConnectsToNextSegment()) + lastPos.Set(seg->imlList.size()-1, false); + cemu_assert_debug(firstPos <= lastPos); + + IMLRegID ourRegId = currentRange->GetVirtualRegister(); + + IMLFixedRegisters fixedRegs; + if(firstPos.IsOnOutputEdge()) + GetInstructionFixedRegisters(seg->imlList.data()+firstPos.GetInstructionIndex(), fixedRegs); + for(raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos) + { + if(currentPos.IsOnInputEdge()) + { + GetInstructionFixedRegisters(seg->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs); + } + auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; + for(auto& fixedRegLoc : fixedRegAccess) + { + if(fixedRegLoc.reg.GetRegID() != ourRegId) + candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); + } + } +} + +// filter out any registers along the range cluster +void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) +{ + cemu_assert_debug(currentRange->imlSegment == imlSegment); + if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + { + auto clusterRanges = currentRange->GetAllSubrangesInCluster(); + for(auto& rangeIt : clusterRanges) + { + IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet); + if(!candidatePhysRegSet.HasAnyAvailable()) + break; + } + return; + } + IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet); +} + +void __DebugTestA(IMLSegment* imlSegment) +{ + // iterate all ranges + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while(subrangeItr) + { + if(!subrangeItr->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(subrangeItr->HasPhysicalRegister()); + } + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } } bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { + DbgVerifyAllRanges(ctx); // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); IMLRALivenessTimeline livenessTimeline; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge lastInstructionEdge; + lastInstructionEdge.SetRaw(RA_INTER_RANGE_END); + + struct + { + RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting; + RASpillStrategy_AvailableRegisterHole availableRegisterHole; + RASpillStrategy_ExplodeRange explodeRange; + // for ranges that connect to follow up segments: + RASpillStrategy_ExplodeRangeInter explodeRangeInter; + }strategy; + + sint32 dbgIndex = 0; while(subrangeItr) { - sint32 currentIndex = subrangeItr->start.index; + raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor PPCRecRA_debugValidateSubrange(subrangeItr); - livenessTimeline.ExpireRanges(std::min(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges + + // below used to be: std::min(currentIndex, RA_INTER_RANGE_END-1) + livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges + // note: The logic here is complicated in regards to whether the instruction index should be inclusive or exclusive. Find a way to simplify? + // if subrange already has register assigned then add it to the active list and continue if (subrangeItr->GetPhysicalRegister() >= 0) { @@ -402,241 +1096,106 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } + // ranges with fixed register requirements should already have a phys register assigned + if(!subrangeItr->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(subrangeItr->HasPhysicalRegister()); + } // find free register for current subrangeItr and segment IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister()); - IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); - cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool + IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); + cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type + + IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet); + cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler) + candidatePhysRegSet &= allowedRegs; + + __DebugTestA(imlSegment); for (auto& liverangeItr : livenessTimeline.activeRanges) { - cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); - physRegSet.SetReserved(liverangeItr->GetPhysicalRegister()); + cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); + candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister()); + } + // check intersections with other ranges and determine allowed registers + IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments) + if(candidatePhysRegSet.HasAnyAvailable()) + { + // check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments) + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet); + } + // some target instructions may enforce specific registers (e.g. common on X86 where something like SHL , CL forces CL as the count register) + // we determine the list of allowed registers here + // this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet) + if (candidatePhysRegSet.HasAnyAvailable()) + { + IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet); + } + if(candidatePhysRegSet.HasAnyAvailable()) + { + // use free register + subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg()); + livenessTimeline.AddActiveRange(subrangeItr); + subrangeItr = subrangeItr->link_allSegmentRanges.next; // next + continue; + } + __DebugTestA(imlSegment); + // there is no free register for the entire range + // evaluate different strategies of splitting ranges to free up another register or shorten the current range + strategy.localRangeHoleCutting.Reset(); + strategy.availableRegisterHole.Reset(); + strategy.explodeRange.Reset(); + // cant assign register + // there might be registers available, we just can't use them due to range conflicts + RASpillStrategy* selectedStrategy = nullptr; + auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) + { + if(newStrategy.GetCost() == INT_MAX) + return; + if(selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost()) + selectedStrategy = &newStrategy; + }; + + if (!subrangeItr->interval2.ExtendsIntoNextSegment()) + { + // range ends in current segment, use local strategies + // evaluate strategy: Cut hole into local subrange + strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.localRangeHoleCutting); + // evaluate strategy: Split current range to fit in available holes + // todo - are checks required to avoid splitting on the suffix instruction? + strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs); + SelectStrategyIfBetter(strategy.availableRegisterHole); + // evaluate strategy: Explode inter-segment ranges + strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.explodeRange); + __DebugTestA(imlSegment); + } + else // if subrangeItr->interval2.ExtendsIntoNextSegment() + { + strategy.explodeRangeInter.Reset(); + strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.explodeRangeInter); + __DebugTestA(imlSegment); } - // check intersections with other ranges and determine allowed registers - IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) - if(physRegSet.HasAnyAvailable()) + // choose strategy + if(selectedStrategy) { - // check globally in all segments - PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, physRegSet); + selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr); + __DebugTestA(imlSegment); } - if (!physRegSet.HasAnyAvailable()) + else { - struct - { - // estimated costs and chosen candidates for the different spill strategies - // hole cutting into a local range - struct - { - sint32 distance; - raLivenessRange* largestHoleSubrange; - sint32 cost; // additional cost of choosing this candidate - }localRangeHoleCutting; - // split current range (this is generally only a good choice when the current range is long but rarely used) - struct - { - sint32 cost; - sint32 physRegister; - sint32 distance; // size of hole - }availableRegisterHole; - // explode a inter-segment range (prefer ranges that are not read/written in this segment) - struct - { - raLivenessRange* range; - sint32 cost; - sint32 distance; // size of hole - // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange - }explodeRange; - // todo - add more strategies, make cost estimation smarter (for example, in some cases splitting can have reduced or no cost if read/store can be avoided due to data flow) - }spillStrategies; - // cant assign register - // there might be registers available, we just can't use them due to range conflicts - if (subrangeItr->end.index != RA_INTER_RANGE_END) - { - // range ends in current segment - - // Current algo looks like this: - // 1) Get the size of the largest possible hole that we can cut into any of the live local subranges - // 1.1) Check if the hole is large enough to hold the current subrange - // 2) If yes, cut hole and return false (full retry) - // 3) If no, try to reuse free register (need to determine how large the region is we can use) - // 4) If there is no free register or the range is extremely short go back to step 1+2 but additionally split the current subrange at where the hole ends - - cemu_assert_debug(currentIndex == subrangeItr->start.index); - - sint32 requiredSize = subrangeItr->end.index - subrangeItr->start.index; - // evaluate strategy: Cut hole into local subrange - spillStrategies.localRangeHoleCutting.distance = -1; - spillStrategies.localRangeHoleCutting.largestHoleSubrange = nullptr; - spillStrategies.localRangeHoleCutting.cost = INT_MAX; - if (currentIndex >= 0) - { - for (auto candidate : livenessTimeline.activeRanges) - { - if (candidate->end.index == RA_INTER_RANGE_END) - continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); - if (distance < 2) - continue; // not even worth the consideration - // calculate split cost of candidate - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentIndex + distance); - // calculate additional split cost of currentRange if hole is not large enough - if (distance < requiredSize) - { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) - cost += (requiredSize - distance) / 10; - } - // compare cost with previous candidates - if (cost < spillStrategies.localRangeHoleCutting.cost) - { - spillStrategies.localRangeHoleCutting.cost = cost; - spillStrategies.localRangeHoleCutting.distance = distance; - spillStrategies.localRangeHoleCutting.largestHoleSubrange = candidate; - } - } - } - // evaluate strategy: Split current range to fit in available holes - // todo - are checks required to avoid splitting on the suffix instruction? - spillStrategies.availableRegisterHole.cost = INT_MAX; - spillStrategies.availableRegisterHole.distance = -1; - spillStrategies.availableRegisterHole.physRegister = -1; - if (currentIndex >= 0) - { - if (localAvailableRegsMask.HasAnyAvailable()) - { - sint32 physRegItr = -1; - while (true) - { - physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); - if (physRegItr < 0) - break; - // get size of potential hole for this register - sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, physRegItr); - if (distance < 2) - continue; // not worth consideration - // calculate additional cost due to split - if (distance >= requiredSize) - assert_dbg(); // should not happen or else we would have selected this register - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // add small additional cost for the remaining range (prefer larger holes) - cost += (requiredSize - distance) / 10; - if (cost < spillStrategies.availableRegisterHole.cost) - { - spillStrategies.availableRegisterHole.cost = cost; - spillStrategies.availableRegisterHole.distance = distance; - spillStrategies.availableRegisterHole.physRegister = physRegItr; - } - } - } - } - // evaluate strategy: Explode inter-segment ranges - spillStrategies.explodeRange.cost = INT_MAX; - spillStrategies.explodeRange.range = nullptr; - spillStrategies.explodeRange.distance = -1; - for (auto candidate : livenessTimeline.activeRanges) - { - if (candidate->end.index != RA_INTER_RANGE_END) - continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); - if( distance < 2) - continue; - sint32 cost; - cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); - // if the hole is not large enough, add cost of splitting current subrange - if (distance < requiredSize) - { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // add small additional cost for the remaining range (prefer larger holes) - cost += (requiredSize - distance) / 10; - } - // compare with current best candidate for this strategy - if (cost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = cost; - spillStrategies.explodeRange.distance = distance; - spillStrategies.explodeRange.range = candidate; - } - } - // choose strategy - if (spillStrategies.explodeRange.cost != INT_MAX && spillStrategies.explodeRange.cost <= spillStrategies.localRangeHoleCutting.cost && spillStrategies.explodeRange.cost <= spillStrategies.availableRegisterHole.cost) - { - // explode range - PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); - // split current subrange if necessary - if( requiredSize > spillStrategies.explodeRange.distance) - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex+spillStrategies.explodeRange.distance, true); - } - else if (spillStrategies.availableRegisterHole.cost != INT_MAX && spillStrategies.availableRegisterHole.cost <= spillStrategies.explodeRange.cost && spillStrategies.availableRegisterHole.cost <= spillStrategies.localRangeHoleCutting.cost) - { - // use available register - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.availableRegisterHole.distance, true); - } - else if (spillStrategies.localRangeHoleCutting.cost != INT_MAX && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.explodeRange.cost && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.availableRegisterHole.cost) - { - // cut hole - PPCRecRA_splitLocalSubrange(ppcImlGenContext, spillStrategies.localRangeHoleCutting.largestHoleSubrange, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); - // split current subrange if necessary - if (requiredSize > spillStrategies.localRangeHoleCutting.distance) - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); - } - else if (subrangeItr->start.index == RA_INTER_RANGE_START) - { - // alternative strategy if we have no other choice: explode current range - PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); - } - else - assert_dbg(); - - return false; - } - else - { - // range exceeds segment border - // simple but bad solution -> explode the entire range (no longer allow it to cross segment boundaries) - // better solutions: 1) Depending on the situation, we can explode other ranges to resolve the conflict. Thus we should explode the range with the lowest extra cost - // 2) Or we explode the range only partially - // explode the range with the least cost - spillStrategies.explodeRange.cost = INT_MAX; - spillStrategies.explodeRange.range = nullptr; - spillStrategies.explodeRange.distance = -1; - for(auto candidate : livenessTimeline.activeRanges) - { - if (candidate->end.index != RA_INTER_RANGE_END) - continue; - // only select candidates that clash with current subrange - if (candidate->GetPhysicalRegister() < 0 && candidate != subrangeItr) - continue; - - sint32 cost; - cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); - // compare with current best candidate for this strategy - if (cost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = cost; - spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = candidate; - } - } - // add current range as a candidate too - sint32 ownCost; - ownCost = PPCRecRARange_estimateCostAfterRangeExplode(subrangeItr); - if (ownCost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = ownCost; - spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = subrangeItr; - } - if (spillStrategies.explodeRange.cost == INT_MAX) - assert_dbg(); // should not happen - PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); - } - return false; + // none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy + cemu_assert_debug(subrangeItr->interval2.ExtendsPreviousSegment()); + // alternative strategy if we have no other choice: explode current range + PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); + __DebugTestA(imlSegment); } - // assign register to range - //subrangeItr->SetPhysicalRegister(physRegSet.GetFirstAvailableReg()); - subrangeItr->SetPhysicalRegisterForCluster(physRegSet.GetFirstAvailableReg()); - livenessTimeline.AddActiveRange(subrangeItr); - // next - subrangeItr = subrangeItr->link_allSegmentRanges.next; + // DEBUG BEGIN + DbgVerifyAllRanges(ctx); + dbgIndex++; + // DEBUG END + return false; } return true; } @@ -674,154 +1233,6 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* } } -inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) -{ - return IMLReg(baseFormat, baseFormat, 0, regId); -} - -void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size()); - for (sint32 i = 0; i < loadList.size(); i++) - { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->GetVirtualRegister()]; - cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->GetPhysicalRegister()), loadList[i]->GetName()); - } -} - -void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size()); - for (size_t i = 0; i < storeList.size(); i++) - { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->GetVirtualRegister()]; - cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->GetName(), _MakeNativeReg(baseFormat, storeList[i]->GetPhysicalRegister())); - } -} - -void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) -{ - std::unordered_map virtId2PhysRegIdMap; // key = virtual register, value = physical register - IMLRALivenessTimeline livenessTimeline; - sint32 index = 0; - sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; - // load register ranges that are supplied from previous segments - raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if (subrangeItr->start.index == RA_INTER_RANGE_START) - { - livenessTimeline.AddActiveRange(subrangeItr); -#ifdef CEMU_DEBUG_ASSERT - // load GPR - if (subrangeItr->_noLoad == false) - { - assert_dbg(); - } - // update translation table - cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->GetVirtualRegister())); -#endif - virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); - } - // next - subrangeItr = subrangeItr->link_allSegmentRanges.next; - } - // process instructions - while(index < imlSegment->imlList.size() + 1) - { - // expire ranges - livenessTimeline.ExpireRanges(index); - for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) - { - // update translation table - virtId2PhysRegIdMap.erase(expiredRange->GetVirtualRegister()); - // store GPR if required - // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed - if (expiredRange->hasStore) - { - PPCRecRA_insertGPRStoreInstructions(ctx, imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), {&expiredRange, 1}); - index++; - } - } - - // load new ranges - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if (subrangeItr->start.index == index) - { - livenessTimeline.AddActiveRange(subrangeItr); - // load GPR - // similar to stores, any loads for the next segment need to happen before the suffix instruction - // however, ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) - // this is to prevent the RA from inserting store/load instructions after the suffix instruction - if (imlSegment->HasSuffixInstruction()) - { - cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex()); - } - if (subrangeItr->_noLoad == false) - { - PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), {&subrangeItr , 1}); - index++; - subrangeItr->start.index--; - } - // update translation table - virtId2PhysRegIdMap.insert_or_assign(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); - } - subrangeItr = subrangeItr->link_allSegmentRanges.next; - } - // rewrite registers - if (index < imlSegment->imlList.size()) - imlSegment->imlList[index].RewriteGPR(virtId2PhysRegIdMap); - // next iml instruction - index++; - } - // expire infinite subranges (subranges which cross the segment border) - std::vector loadStoreList; - livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); - for (auto liverange : livenessTimeline.GetExpiredRanges()) - { - // update translation table - virtId2PhysRegIdMap.erase(liverange->GetVirtualRegister()); - // store GPR - if (liverange->hasStore) - loadStoreList.emplace_back(liverange); - } - cemu_assert_debug(livenessTimeline.activeRanges.empty()); - if (!loadStoreList.empty()) - PPCRecRA_insertGPRStoreInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); - // load subranges for next segments - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - loadStoreList.clear(); - while(subrangeItr) - { - if (subrangeItr->start.index == RA_INTER_RANGE_END) - { - livenessTimeline.AddActiveRange(subrangeItr); - // load GPR - if (subrangeItr->_noLoad == false) - loadStoreList.emplace_back(subrangeItr); - // update translation table - virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); - } - // next - subrangeItr = subrangeItr->link_allSegmentRanges.next; - } - if (!loadStoreList.empty()) - PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); -} - -void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx) -{ - for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) - { - IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; - IMLRA_GenerateSegmentMoveInstructions(ctx, imlSegment); - } -} - void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input @@ -937,7 +1348,15 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx #ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); #endif - raLivenessRange* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, imlSegment, vGPR, name, abstractRange->usageStart, abstractRange->usageEnd); + cemu_assert_debug( + (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) || + abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger + sint32 inclusiveEnd = abstractRange->usageEnd; + if(inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END) + inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive + raInterval interval; + interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true); + raLivenessRange* subrange = PPCRecRA_createSubrange2(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end); // traverse forward if (abstractRange->usageEnd == RA_INTER_RANGE_END) { @@ -948,7 +1367,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx { subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name); subrange->subrangeBranchTaken->previousRanges.push_back(subrange); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + cemu_assert_debug(subrange->subrangeBranchTaken->interval2.ExtendsPreviousSegment()); } } if (imlSegment->nextSegmentBranchNotTaken) @@ -958,7 +1377,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx { subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name); subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + cemu_assert_debug(subrange->subrangeBranchNotTaken->interval2.ExtendsPreviousSegment()); } } } @@ -976,19 +1395,33 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx } // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction // this is due to range load instructions being inserted before the suffix instruction - if (subrange->end.index == RA_INTER_RANGE_END) - { - if (imlSegment->HasSuffixInstruction()) - { - cemu_assert_debug(subrange->start.index <= imlSegment->GetSuffixInstructionIndex()); - } - } + // todo - currently later steps might break this assumption, look into this + // if (subrange->interval2.ExtendsIntoNextSegment()) + // { + // if (imlSegment->HasSuffixInstruction()) + // { + // cemu_assert_debug(subrange->interval2.start.GetInstructionIndex() <= imlSegment->GetSuffixInstructionIndex()); + // } + // } return subrange; } // take abstract range data and create LivenessRanges void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { + const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); + + auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) + { + raLivenessRange* subrange = regToSubrange.find(regId)->second; + cemu_assert_debug(subrange); + raFixedRegRequirement tmp; + tmp.pos.Set(instructionIndex, isInput); + tmp.allowedReg = physRegSet; + if(subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos) + subrange->list_fixedRegRequirements.push_back(tmp); + }; + // convert abstract min-max ranges to liveness range objects auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); for (auto& it : segMap) @@ -1001,7 +1434,6 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML // fill created ranges with read/write location indices // note that at this point there is only one range per register per segment // and the algorithm below relies on this - const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); size_t index = 0; IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) @@ -1011,16 +1443,20 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML IMLRegID gprId = gprReg.GetRegID(); raLivenessRange* subrange = regToSubrange.find(gprId)->second; PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); -#ifdef CEMU_DEBUG_ASSERT - if ((sint32)index < subrange->start.index) + cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index); + cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index); + }); + // check fixed register requirements + IMLFixedRegisters fixedRegs; + GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); + for(auto& fixedRegAccess : fixedRegs.listInput) { - IMLRARegAbstractLiveness* dbgAbstractRange = _GetAbstractRange(ctx, imlSegment, gprId); - assert_dbg(); + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); + } + for(auto& fixedRegAccess : fixedRegs.listOutput) + { + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); } - if ((sint32)index + 1 > subrange->end.index) - assert_dbg(); -#endif - }); index++; } } @@ -1190,7 +1626,7 @@ void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSeg } } -void IMLRA_mergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) +void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) { for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { @@ -1201,7 +1637,7 @@ void IMLRA_mergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) } } -void IMLRA_extendAbstracRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) +void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) { for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { @@ -1238,15 +1674,15 @@ void IMLRA_extendAbstracRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) { - IMLRA_mergeCloseAbstractRanges(ctx); - // extra pass to move register stores out of loops - IMLRA_extendAbstracRangesOutOfLoops(ctx); + IMLRA_MergeCloseAbstractRanges(ctx); + // extra pass to move register loads and stores out of loops + IMLRA_ExtendAbstractRangesOutOfLoops(ctx); // calculate liveness ranges for (auto& segIt : ctx.deprGenContext->segmentList2) IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); } -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessRange* subrange) +void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange) { bool isRead = false; bool isWritten = false; @@ -1267,7 +1703,7 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessRange* subrange) subrange->_noLoad = isOverwritten; subrange->hasStore = isWritten; - if (subrange->start.index == RA_INTER_RANGE_START) + if (subrange->interval2.ExtendsPreviousSegment()) subrange->_noLoad = true; } @@ -1294,7 +1730,7 @@ void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, if (subrange->hasStoreDelayed) return; // no need to traverse this subrange IMLSegment* imlSegment = subrange->imlSegment; - if (subrange->end.index != RA_INTER_RANGE_END) + if (!subrange->interval2.ExtendsIntoNextSegment()) { // ending segment if (info->subrangeCount >= SUBRANGE_LIST_SIZE) @@ -1335,9 +1771,9 @@ void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, } } -static void _analyzeRangeDataFlow(raLivenessRange* subrange) +static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) { - if (subrange->end.index != RA_INTER_RANGE_END) + if (!subrange->interval2.ExtendsIntoNextSegment()) return; // analyze data flow across segments (if this segment has writes) if (subrange->hasStore) @@ -1381,47 +1817,312 @@ static void _analyzeRangeDataFlow(raLivenessRange* subrange) void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) { - // this function is called after _assignRegisters(), which means that all liveness ranges are already final and must not be changed anymore - // in the first pass we track read/write dependencies + // this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore + // track read/write dependencies per segment for(auto& seg : ppcImlGenContext->segmentList2) { raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; while(subrange) { - PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + IMLRA_AnalyzeSubrangeDataDependency(subrange); subrange = subrange->link_allSegmentRanges.next; } } - // then we do a second pass where we scan along subrange flow + // propagate information across segment boundaries for(auto& seg : ppcImlGenContext->segmentList2) { raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; while(subrange) { - _analyzeRangeDataFlow(subrange); + IMLRA_AnalyzeRangeDataFlow(subrange); subrange = subrange->link_allSegmentRanges.next; } } } +/* Generate move instructions */ + +inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) +{ + return IMLReg(baseFormat, baseFormat, 0, regId); +} + +#define DEBUG_RA_INSTRUCTION_GEN 0 + +// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts +void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + std::unordered_map virtId2PhysReg; + boost::container::small_vector activeRanges; + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge currentEdge; + for(size_t i=0; iimlList.size(); i++) + { + currentEdge.Set(i, false); // set to instruction index on output edge + // activate ranges which begin before or during this instruction + while(currentRange && currentRange->interval2.start <= currentEdge) + { + cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict + + virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister(); + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // rewrite registers + imlSegment->imlList[i].RewriteGPR(virtId2PhysReg); + // deactivate ranges which end during this instruction + auto it = activeRanges.begin(); + while(it != activeRanges.end()) + { + if((*it)->interval2.end <= currentEdge) + { + virtId2PhysReg.erase((*it)->GetVirtualRegister()); + it = activeRanges.erase(it); + } + else + ++it; + } + } +} + +void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + IMLRA_RewriteRegisters(ctx, imlSegment); + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "[Seg before RA]"); + IMLDebug_DumpSegment(nullptr, imlSegment, true); +#endif + + bool hadSuffixInstruction = imlSegment->HasSuffixInstruction(); + + std::vector rebuiltInstructions; + sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0); + + if(imlSegment->imlList.empty()) + { + // empty segments need special handling (todo - look into merging this with the core logic below eventually) + // store all ranges + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + while(currentRange) + { + if(currentRange->hasStore) + rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister())); + currentRange = currentRange->link_allSegmentRanges.next; + } + // load ranges + currentRange = imlSegment->raInfo.linkedList_allSubranges; + while(currentRange) + { + if(!currentRange->_noLoad) + { + cemu_assert_debug(currentRange->interval2.ExtendsIntoNextSegment()); + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + } + currentRange = currentRange->link_allSegmentRanges.next; + } + imlSegment->imlList = std::move(rebuiltInstructions); + return; + } + + // make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?) + { + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge edge; + if(imlSegment->HasSuffixInstruction()) + edge.Set(numInstructionsWithoutSuffix, true); + else + edge.Set(numInstructionsWithoutSuffix-1, false); + + while(currentRange) + { + if(!currentRange->interval2.IsNextSegmentOnly() && currentRange->interval2.end > edge) + { + currentRange->interval2.SetEnd(edge); + } + currentRange = currentRange->link_allSegmentRanges.next; + } + } + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "--- Intermediate liveness info ---"); + { + raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges; + while(dbgRange) + { + cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString()); + dbgRange = dbgRange->link_allSegmentRanges.next; + } + } +#endif + + boost::container::small_vector activeRanges; + // first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + + // make all ranges active that start on RA_INTER_RANGE_START + while(currentRange && currentRange->interval2.start.ConnectsToPreviousSegment()) + { + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // store all ranges that end before the first output edge (includes RA_INTER_RANGE_START) + auto it = activeRanges.begin(); + raInstructionEdge firstOutputEdge; + firstOutputEdge.Set(0, false); + while(it != activeRanges.end()) + { + if( (*it)->interval2.end < firstOutputEdge) + { + raLivenessRange* storedRange = *it; + if(storedRange->hasStore) + rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); + it = activeRanges.erase(it); + continue; + } + ++it; + } + + sint32 numInstructions = (sint32)imlSegment->imlList.size(); + for(sint32 i=0; iinterval2.start <= curEdge) + { + if(!currentRange->_noLoad) + { + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + } + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // copy instruction + rebuiltInstructions.push_back(imlSegment->imlList[i]); + // output edge + curEdge.SetRaw(i*2+1+1); + // also store ranges that end on the next input edge, we handle this by adding an extra 1 above + auto it = activeRanges.begin(); + while(it != activeRanges.end()) + { + if( (*it)->interval2.end <= curEdge) + { + // range expires + // we cant erase it from virtId2PhysReg right away because a store might happen before the last use (the +1 thing above) + + + // todo - check hasStore + raLivenessRange* storedRange = *it; + if(storedRange->hasStore) + { + cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix + rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); + } + + it = activeRanges.erase(it); + continue; + } + ++it; + } + } + // if there is no suffix instruction we currently need to handle the final loads here + cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); + if(imlSegment->HasSuffixInstruction()) + { + cemu_assert_debug(!currentRange); // currentRange should be NULL? + for(auto& remainingRange : activeRanges) + { + cemu_assert_debug(!remainingRange->hasStore); + } + } + else + { + for(auto& remainingRange : activeRanges) + { + cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored + } + while(currentRange) + { + cemu_assert_debug(currentRange->interval2.IsNextSegmentOnly()); + cemu_assert_debug(!currentRange->_noLoad); + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + currentRange = currentRange->link_allSegmentRanges.next; + } + } + + imlSegment->imlList = std::move(rebuiltInstructions); + cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "[Seg after RA]"); + IMLDebug_DumpSegment(nullptr, imlSegment, false); +#endif +} + +void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx) +{ + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment); + } +} + +void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx) +{ + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while(subrangeItr) + { + PPCRecRA_debugValidateSubrange(subrangeItr); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + } +} + void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) { IMLRegisterAllocatorContext ctx; ctx.raParam = &raParam; ctx.deprGenContext = ppcImlGenContext; + DbgVerifyAllRanges(ctx); // DEBUG + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); + DbgVerifyAllRanges(ctx); // DEBUG + ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment + DbgVerifyAllRanges(ctx); // DEBUG ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); IMLRA_CalculateLivenessRanges(ctx); + DbgVerifyAllRanges(ctx); // DEBUG IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); + DbgVerifyAllRanges(ctx); // DEBUG IMLRA_AssignRegisters(ctx, ppcImlGenContext); + DbgVerifyAllRanges(ctx); // DEBUG + + // debug print + //IMLDebug_Dump(ppcImlGenContext, true); + + // debug print + // if (ppcImlGenContext->debug_entryPPCAddress == 0x2BDA9F4) + // { + // IMLDebug_Dump(ppcImlGenContext, true); + // __debugbreak(); + // } IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); IMLRA_GenerateMoveInstructions(ctx); + PPCRecRA_deleteAllRanges(ppcImlGenContext); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 52b203970..9e5573a6c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -1,6 +1,7 @@ +#pragma once // container for storing a set of register indices -// specifically optimized towards storing physical register indices (expected to be below 64) +// specifically optimized towards storing typical range of physical register indices (expected to be below 64) class IMLPhysRegisterSet { public: @@ -33,11 +34,21 @@ class IMLPhysRegisterSet return *this; } + void RemoveRegisters(const IMLPhysRegisterSet& other) + { + this->m_regBitmask &= ~other.m_regBitmask; + } + bool HasAnyAvailable() const { return m_regBitmask != 0; } + bool HasExactlyOneAvailable() const + { + return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0; + } + // returns index of first available register. Do not call when HasAnyAvailable() == false uint32 GetFirstAvailableReg() { @@ -59,7 +70,7 @@ class IMLPhysRegisterSet // returns index of next available register (search includes any register index >= startIndex) // returns -1 if there is no more register - sint32 GetNextAvailableReg(sint32 startIndex) + sint32 GetNextAvailableReg(sint32 startIndex) const { if (startIndex >= 64) return -1; @@ -81,6 +92,11 @@ class IMLPhysRegisterSet return regIndex; } + sint32 CountAvailableRegs() const + { + return std::popcount(m_regBitmask); + } + private: uint64 m_regBitmask{ 0 }; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 602cdfa77..e58b7888c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -22,7 +22,6 @@ IMLName raLivenessRange::GetName() const void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister) { - cemu_assert_suspicious(); // not used yet this->physicalRegister = physicalRegister; } @@ -68,6 +67,58 @@ boost::container::small_vector raLivenessRange::GetAllSubr return subranges; } +bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters) +{ + if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment()) + { + auto clusterRanges = GetAllSubrangesInCluster(); + bool hasAnyRequirement = false; + for(auto& subrange : clusterRanges) + { + if(subrange->list_fixedRegRequirements.empty()) + continue; + allowedRegisters = subrange->list_fixedRegRequirements.front().allowedReg; + hasAnyRequirement = true; + break; + } + if(!hasAnyRequirement) + return false; + for(auto& subrange : clusterRanges) + { + for(auto& fixedRegLoc : subrange->list_fixedRegRequirements) + allowedRegisters &= fixedRegLoc.allowedReg; + } + } + else + { + // local check only, slightly faster + if(list_fixedRegRequirements.empty()) + return false; + allowedRegisters = list_fixedRegRequirements.front().allowedReg; + for(auto& fixedRegLoc : list_fixedRegRequirements) + allowedRegisters &= fixedRegLoc.allowedReg; + } + return true; +} + +IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool) +{ + IMLPhysRegisterSet fixedRegRequirements = regPool; + if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment()) + { + auto clusterRanges = GetAllSubrangesInCluster(); + for(auto& subrange : clusterRanges) + { + for(auto& fixedRegLoc : subrange->list_fixedRegRequirements) + fixedRegRequirements &= fixedRegLoc.allowedReg; + } + return fixedRegRequirements; + } + for(auto& fixedRegLoc : list_fixedRegRequirements) + fixedRegRequirements &= fixedRegLoc.allowedReg; + return fixedRegRequirements; +} + void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) { IMLRegID regId = subrange->GetVirtualRegister(); @@ -142,14 +193,19 @@ void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenes MemoryPoolPermanentObjects memPool_livenessSubrange(4096); -raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex) +// startPosition and endPosition are inclusive +raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition) { raLivenessRange* range = memPool_livenessSubrange.acquireObj(); range->previousRanges.clear(); - range->list_locations.resize(0); + range->list_locations.clear(); + range->list_fixedRegRequirements.clear(); range->imlSegment = imlSegment; - PPCRecompilerIml_setSegmentPoint(&range->start, imlSegment, startIndex); - PPCRecompilerIml_setSegmentPoint(&range->end, imlSegment, endIndex); + + cemu_assert_debug(startPosition <= endPosition); + range->interval2.start = startPosition; + range->interval2.end = endPosition; + // register mapping range->virtualRegister = virtualRegister; range->name = name; @@ -160,6 +216,7 @@ raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, I range->lastIterationIndex = 0; range->subrangeBranchNotTaken = nullptr; range->subrangeBranchTaken = nullptr; + cemu_assert_debug(range->previousRanges.empty()); range->_noLoad = false; // add to segment linked lists PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); @@ -172,6 +229,22 @@ void _unlinkSubrange(raLivenessRange* subrange) IMLSegment* imlSegment = subrange->imlSegment; PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange); PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange); + // unlink reverse references + if(subrange->subrangeBranchTaken) + subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange)); + if(subrange->subrangeBranchNotTaken) + subrange->subrangeBranchNotTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange)); + subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; + subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; + // remove forward references + for(auto& prev : subrange->previousRanges) + { + if(prev->subrangeBranchTaken == subrange) + prev->subrangeBranchTaken = nullptr; + if(prev->subrangeBranchNotTaken == subrange) + prev->subrangeBranchNotTaken = nullptr; + } + subrange->previousRanges.clear(); } void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) @@ -179,14 +252,9 @@ void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan _unlinkSubrange(subrange); //subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); subrange->list_locations.clear(); - // unlink reverse references - if(subrange->subrangeBranchTaken) - subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange)); - if(subrange->subrangeBranchNotTaken) - subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange)); - PPCRecompilerIml_removeSegmentPoint(&subrange->start); - PPCRecompilerIml_removeSegmentPoint(&subrange->end); + //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start); + //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end); memPool_livenessSubrange.releaseObj(subrange); } @@ -194,9 +262,18 @@ void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); - PPCRecompilerIml_removeSegmentPoint(&subrange->start); - PPCRecompilerIml_removeSegmentPoint(&subrange->end); + //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start); + //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end); memPool_livenessSubrange.releaseObj(subrange); + +// #ifdef CEMU_DEBUG_ASSERT +// // DEBUG BEGIN +// subrange->lastIterationIndex = 0xFFFFFFFE; +// subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; +// subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; +// +// // DEBUG END +// #endif } void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) @@ -229,8 +306,8 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan PPCRecRA_debugValidateSubrange(absorbedSubrange); if (subrange->imlSegment != absorbedSubrange->imlSegment) assert_dbg(); - if (subrange->end.index > absorbedSubrange->start.index) - assert_dbg(); + cemu_assert_debug(subrange->interval2.end == absorbedSubrange->interval2.start); + if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken) assert_dbg(); if (subrange == absorbedSubrange) @@ -238,21 +315,45 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan #endif // update references - if(absorbedSubrange->subrangeBranchTaken) - *std::find(absorbedSubrange->subrangeBranchTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange; - if(absorbedSubrange->subrangeBranchNotTaken) - *std::find(absorbedSubrange->subrangeBranchNotTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; + absorbedSubrange->subrangeBranchTaken = nullptr; + absorbedSubrange->subrangeBranchNotTaken = nullptr; + if(subrange->subrangeBranchTaken) + *std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange; + if(subrange->subrangeBranchNotTaken) + *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; // merge usage locations + // at the merge point both ranges might track the same instruction, we handle this by first merging this duplicate location + if(subrange && absorbedSubrange && !subrange->list_locations.empty() && !absorbedSubrange->list_locations.empty()) + { + if(subrange->list_locations.back().index == absorbedSubrange->list_locations.front().index) + { + subrange->list_locations.back().isRead |= absorbedSubrange->list_locations.front().isRead; + subrange->list_locations.back().isWrite |= absorbedSubrange->list_locations.front().isWrite; + absorbedSubrange->list_locations.erase(absorbedSubrange->list_locations.begin()); // inefficient + } + } for (auto& location : absorbedSubrange->list_locations) { + cemu_assert_debug(subrange->list_locations.empty() || (subrange->list_locations.back().index < location.index)); // todo - sometimes a subrange can contain the same instruction at the merge point if they are covering half of the instruction edge subrange->list_locations.push_back(location); } absorbedSubrange->list_locations.clear(); + // merge fixed reg locations +#ifdef CEMU_DEBUG_ASSERT + if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos); + } +#endif + for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements) + { + subrange->list_fixedRegRequirements.push_back(fixedReg); + } - subrange->end.index = absorbedSubrange->end.index; + subrange->interval2.end = absorbedSubrange->interval2.end; PPCRecRA_debugValidateSubrange(subrange); @@ -262,16 +363,21 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan // remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges) void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) { + cemu_assert_debug(originRange->interval2.ExtendsPreviousSegment() || originRange->interval2.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments auto clusterRanges = originRange->GetAllSubrangesInCluster(); for (auto& subrange : clusterRanges) { if (subrange->list_locations.empty()) continue; - raLivenessRange* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); - // copy locations - for (auto& location : subrange->list_locations) + raInterval interval; + interval.SetInterval(subrange->list_locations.front().index, true, subrange->list_locations.back().index, true); + raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); + // copy locations and fixed reg indices + newSubrange->list_locations = subrange->list_locations; + newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements; + if(originRange->HasPhysicalRegister()) { - newSubrange->list_locations.push_back(location); + cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement } } // remove subranges @@ -279,82 +385,223 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange } #ifdef CEMU_DEBUG_ASSERT -void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange) +void PPCRecRA_debugValidateSubrange(raLivenessRange* range) { // validate subrange - if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken) + if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken) assert_dbg(); - if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken) + if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken) assert_dbg(); + + if(range->subrangeBranchTaken || range->subrangeBranchNotTaken) + { + cemu_assert_debug(range->interval2.end.ConnectsToNextSegment()); + } + if(!range->previousRanges.empty()) + { + cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment()); + } + // validate locations + if (!range->list_locations.empty()) + { + cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx()); + cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx()); + } + } #else -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} +void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {} #endif -// split subrange at the given index -// After the split there will be two ranges and subranges: +// since locations are per-instruction, but intervals are per-edge, it's possible that locations track reads/writes outside of the range +// this function will remove any outside read/write locations +void IMLRA_FixLocations(raLivenessRange* range) +{ + if(range->list_locations.empty()) + return; + if(range->interval2.start.IsInstructionIndex() && range->interval2.start.GetInstructionIndex() == range->list_locations.front().index) + { + auto& location = range->list_locations.front(); + if(range->interval2.start.IsOnOutputEdge()) + { + location.isRead = false; + if(!location.isRead && !location.isWrite) + range->list_locations.erase(range->list_locations.begin()); + } + } + if(range->list_locations.empty()) + return; + if(range->interval2.end.IsInstructionIndex() && range->interval2.end.GetInstructionIndex() == range->list_locations.back().index) + { + auto& location = range->list_locations.back(); + if(range->interval2.end.IsOnInputEdge()) + { + location.isWrite = false; + if(!location.isRead && !location.isWrite) + range->list_locations.pop_back(); + } + } +} + +// trim start and end of range to match first and last read/write locations +// does not trim start/endpoints which extend into the next/previous segment +void IMLRA_TrimRangeToUse(raLivenessRange* range) +{ + if(range->list_locations.empty()) + { + // special case where we trim ranges extending from other segments to a single instruction edge + cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex()); + if(range->interval2.start.IsInstructionIndex()) + range->interval2.start = range->interval2.end; + if(range->interval2.end.IsInstructionIndex()) + range->interval2.end = range->interval2.start; + return; + } + raInterval prevInterval = range->interval2; + // trim start + if(range->interval2.start.IsInstructionIndex()) + { + bool isInputEdge = range->list_locations.front().isRead; + range->interval2.start.Set(range->list_locations.front().index, isInputEdge); + } + // trim end + if(range->interval2.end.IsInstructionIndex()) + { + bool isOutputEdge = range->list_locations.back().isWrite; + range->interval2.end.Set(range->list_locations.back().index, !isOutputEdge); + } + // extra checks +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(range->interval2.start <= range->interval2.end); + for(auto& loc : range->list_locations) + { + cemu_assert_debug(range->interval2.ContainsInstructionIndex(loc.index)); + } + cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2)); +#endif +} + +// split range at the given position +// After the split there will be two ranges: // head -> subrange is shortened to end at splitIndex (exclusive) // tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange // if head has a physical register assigned it will not carry over to tail -// The return value is the tail subrange -// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations -// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split -raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole) +// The return value is the tail range +// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within them +// the range after the split point does not inherit the physical register +// if trimToHole is true and any of the halfes is empty, it will be deleted +raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole) { - // validation + cemu_assert_debug(splitPosition.IsInstructionIndex()); + cemu_assert_debug(!subrange->interval2.IsNextSegmentOnly() && !subrange->interval2.IsPreviousSegmentOnly()); + cemu_assert_debug(subrange->interval2.ContainsEdge(splitPosition)); + // determine new intervals + raInterval headInterval, tailInterval; + headInterval.SetInterval(subrange->interval2.start, splitPosition-1); + tailInterval.SetInterval(splitPosition, subrange->interval2.end); + cemu_assert_debug(headInterval.start <= headInterval.end); + cemu_assert_debug(tailInterval.start <= tailInterval.end); + // create tail + raLivenessRange* tailSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end); + tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister()); + // carry over branch targets and update reverse references + tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken; + tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken; + subrange->subrangeBranchTaken = nullptr; + subrange->subrangeBranchNotTaken = nullptr; + if(tailSubrange->subrangeBranchTaken) + *std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange; + if(tailSubrange->subrangeBranchNotTaken) + *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange; + // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case #ifdef CEMU_DEBUG_ASSERT - //if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) - // assert_dbg(); - if (subrange->start.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) - assert_dbg(); - if (subrange->start.index >= splitIndex) - assert_dbg(); - if (subrange->end.index <= splitIndex) - assert_dbg(); + if(!subrange->list_locations.empty()) + { + sint32 curIdx = -1; + for(auto& location : subrange->list_locations) + { + cemu_assert_debug(curIdx < location.index); + curIdx = location.index; + } + } #endif - // create tail - raLivenessRange* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), splitIndex, subrange->end.index); - // copy locations + // split locations + // since there are 2 edges per instruction and locations track both via a single index, locations on the split point might need to be copied into both ranges for (auto& location : subrange->list_locations) { - if (location.index >= splitIndex) + if(tailInterval.ContainsInstructionIndex(location.index)) tailSubrange->list_locations.push_back(location); } // remove tail locations from head for (sint32 i = 0; i < subrange->list_locations.size(); i++) { raLivenessLocation_t* location = subrange->list_locations.data() + i; - if (location->index >= splitIndex) + if (!headInterval.ContainsInstructionIndex(location->index)) { subrange->list_locations.resize(i); break; } } - // adjust start/end - if (trimToHole) + // split fixed reg requirements + for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) { - if (subrange->list_locations.empty()) + raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; + if (tailInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex())) { - subrange->end.index = subrange->start.index+1; + tailSubrange->list_fixedRegRequirements.push_back(*fixedReg); + } + } + // remove tail fixed reg requirements from head + for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) + { + raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; + if (!headInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex())) + { + subrange->list_fixedRegRequirements.resize(i); + break; + } + } + // adjust intervals + subrange->interval2 = headInterval; + tailSubrange->interval2 = tailInterval; + // fix locations to only include read/write edges within the range + if(subrange) + IMLRA_FixLocations(subrange); + if(tailSubrange) + IMLRA_FixLocations(tailSubrange); + // trim to hole + if(trimToHole) + { + if(subrange->list_locations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex())) + { + PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); + subrange = nullptr; } else { - subrange->end.index = subrange->list_locations.back().index + 1; + IMLRA_TrimRangeToUse(subrange); } - if (tailSubrange->list_locations.empty()) + if(tailSubrange->list_locations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex())) { - assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all) + PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange); + tailSubrange = nullptr; } else { - tailSubrange->start.index = tailSubrange->list_locations.front().index; + IMLRA_TrimRangeToUse(tailSubrange); } } - else - { - // set head range to end at split index - subrange->end.index = splitIndex; - } + // validation + cemu_assert_debug(!subrange || subrange->interval2.start <= subrange->interval2.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start <= tailSubrange->interval2.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start >= splitPosition); + if (!trimToHole) + cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start == splitPosition); + + if(subrange) + PPCRecRA_debugValidateSubrange(subrange); + if(tailSubrange) + PPCRecRA_debugValidateSubrange(tailSubrange); return tailSubrange; } @@ -401,13 +648,13 @@ sint32 PPCRecRARange_estimateTotalCost(std::span ranges) for (auto& subrange : ranges) { - if (subrange->start.index != RA_INTER_RANGE_START) + if (!subrange->interval2.ExtendsPreviousSegment()) { //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); readCount++; } - if (subrange->end.index != RA_INTER_RANGE_END) + if (!subrange->interval2.ExtendsIntoNextSegment()) { //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); @@ -433,13 +680,14 @@ sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange) return cost; } -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex) +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition) { // validation #ifdef CEMU_DEBUG_ASSERT - if (subrange->end.index == RA_INTER_RANGE_END) + if (subrange->interval2.ExtendsIntoNextSegment()) assert_dbg(); #endif + cemu_assert_debug(splitPosition.IsInstructionIndex()); sint32 cost = 0; // find split position in location list @@ -448,25 +696,15 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, assert_dbg(); // should not happen? return 0; } - if (splitIndex <= subrange->list_locations.front().index) + sint32 splitInstructionIndex = splitPosition.GetInstructionIndex(); + if (splitInstructionIndex <= subrange->list_locations.front().index) return 0; - if (splitIndex > subrange->list_locations.back().index) + if (splitInstructionIndex > subrange->list_locations.back().index) return 0; // todo - determine exact cost of split subranges cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store - //for (sint32 f = 0; f < subrange->list_locations.size(); f++) - //{ - // raLivenessLocation_t* location = subrange->list_locations.data() + f; - // if (location->index >= splitIndex) - // { - // ... - // return cost; - // } - //} - return cost; -} - +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 31deaab37..4467d2f01 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -1,4 +1,5 @@ #pragma once +#include "IMLRegisterAllocator.h" struct raLivenessLocation_t { @@ -18,11 +19,286 @@ struct raLivenessSubrangeLink struct raLivenessRange* next; }; +struct raInstructionEdge +{ + friend struct raInterval; +public: + raInstructionEdge() + { + index = 0; + } + + raInstructionEdge(sint32 instructionIndex, bool isInputEdge) + { + Set(instructionIndex, isInputEdge); + } + + void Set(sint32 instructionIndex, bool isInputEdge) + { + if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END) + { + index = instructionIndex; + return; + } + index = instructionIndex * 2 + (isInputEdge ? 0 : 1); + cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane + } + + void SetRaw(sint32 index) + { + this->index = index; + cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane + } + + // sint32 GetRaw() + // { + // this->index = index; + // } + + std::string GetDebugString() + { + if(index == RA_INTER_RANGE_START) + return "RA_START"; + else if(index == RA_INTER_RANGE_END) + return "RA_END"; + std::string str = fmt::format("{}", GetInstructionIndex()); + if(IsOnInputEdge()) + str += "i"; + else if(IsOnOutputEdge()) + str += "o"; + return str; + } + + sint32 GetInstructionIndex() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return index >> 1; + } + + // returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END + sint32 GetInstructionIndexEx() const + { + if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END) + return index; + return index >> 1; + } + + sint32 GetRaw() const + { + return index; + } + + bool IsOnInputEdge() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return (index&1) == 0; + } + + bool IsOnOutputEdge() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return (index&1) != 0; + } + + bool ConnectsToPreviousSegment() const + { + return index == RA_INTER_RANGE_START; + } + + bool ConnectsToNextSegment() const + { + return index == RA_INTER_RANGE_END; + } + + bool IsInstructionIndex() const + { + return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END; + } + + // comparison operators + bool operator>(const raInstructionEdge& other) const + { + return index > other.index; + } + bool operator<(const raInstructionEdge& other) const + { + return index < other.index; + } + bool operator<=(const raInstructionEdge& other) const + { + return index <= other.index; + } + bool operator>=(const raInstructionEdge& other) const + { + return index >= other.index; + } + bool operator==(const raInstructionEdge& other) const + { + return index == other.index; + } + + raInstructionEdge operator+(sint32 offset) const + { + cemu_assert_debug(IsInstructionIndex()); + cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END); + raInstructionEdge edge; + edge.index = index + offset; + return edge; + } + + raInstructionEdge operator-(sint32 offset) const + { + cemu_assert_debug(IsInstructionIndex()); + cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END); + raInstructionEdge edge; + edge.index = index - offset; + return edge; + } + + raInstructionEdge& operator++() + { + cemu_assert_debug(IsInstructionIndex()); + index++; + return *this; + } + +private: + sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2 + +}; + +struct raInterval +{ + raInterval() + { + + } + + raInterval(raInstructionEdge start, raInstructionEdge end) + { + SetInterval(start, end); + } + + // isStartOnInput = Input+Output edge on first instruction. If false then only output + // isEndOnOutput = Input+Output edge on last instruction. If false then only input + void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput) + { + this->start.Set(start, isStartOnInput); + this->end.Set(end, !isEndOnOutput); + } + + void SetInterval(raInstructionEdge start, raInstructionEdge end) + { + cemu_assert_debug(start <= end); + this->start = start; + this->end = end; + } + + void SetStart(const raInstructionEdge& edge) + { + start = edge; + } + + void SetEnd(const raInstructionEdge& edge) + { + end = edge; + } + + sint32 GetStartIndex() const + { + return start.GetInstructionIndex(); + } + + sint32 GetEndIndex() const + { + return end.GetInstructionIndex(); + } + + bool ExtendsPreviousSegment() const + { + return start.ConnectsToPreviousSegment(); + } + + bool ExtendsIntoNextSegment() const + { + return end.ConnectsToNextSegment(); + } + + bool IsNextSegmentOnly() const + { + return start.ConnectsToNextSegment() && end.ConnectsToNextSegment(); + } + + bool IsPreviousSegmentOnly() const + { + return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment(); + } + + // returns true if range is contained within a single segment + bool IsLocal() const + { + return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END; + } + + bool ContainsInstructionIndex(sint32 instructionIndex) const + { + cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END); + return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx(); + } + + // similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input + bool ContainsInstructionIndexEx(sint32 instructionIndex) const + { + if(instructionIndex == RA_INTER_RANGE_START) + return start.ConnectsToPreviousSegment(); + if(instructionIndex == RA_INTER_RANGE_END) + return end.ConnectsToNextSegment(); + return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx(); + } + + bool ContainsEdge(const raInstructionEdge& edge) const + { + return edge >= start && edge <= end; + } + + bool ContainsWholeInterval(const raInterval& other) const + { + return other.start >= start && other.end <= end; + } + + bool IsOverlapping(const raInterval& other) const + { + return start <= other.end && end >= other.start; + } + + sint32 GetPreciseDistance() + { + cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this? + if(start == end) + return 1; + cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment()); + if(start.ConnectsToPreviousSegment()) + return end.GetRaw() + 1; + + return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive + } + +//private: not making these directly accessible only forces us to create loads of verbose getters and setters + raInstructionEdge start; + raInstructionEdge end; +}; + +struct raFixedRegRequirement +{ + raInstructionEdge pos; + IMLPhysRegisterSet allowedReg; +}; + struct raLivenessRange { IMLSegment* imlSegment; - IMLSegmentPoint start; - IMLSegmentPoint end; + raInterval interval2; + // dirty state tracking bool _noLoad; bool hasStore; @@ -34,28 +310,34 @@ struct raLivenessRange boost::container::small_vector previousRanges; // processing uint32 lastIterationIndex; - // instruction locations + // instruction read/write locations std::vector list_locations; + // ordered list of all raInstructionEdge indices which require a fixed register + std::vector list_fixedRegRequirements; // linked list (subranges with same GPR virtual register) raLivenessSubrangeLink link_sameVirtualRegister; // linked list (all subranges for this segment) raLivenessSubrangeLink link_allSegmentRanges; - // register mapping (constant) + // register info IMLRegID virtualRegister; IMLName name; // register allocator result sint32 physicalRegister; boost::container::small_vector GetAllSubrangesInCluster(); + bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined + IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out IMLRegID GetVirtualRegister() const; sint32 GetPhysicalRegister() const; + bool HasPhysicalRegister() const { return physicalRegister >= 0; } IMLName GetName() const; void SetPhysicalRegister(sint32 physicalRegister); void SetPhysicalRegisterForCluster(sint32 physicalRegister); + void UnsetPhysicalRegister() { physicalRegister = -1; } }; -raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex); +raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); @@ -63,7 +345,7 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); -raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole = false); +raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false); void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite); void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); @@ -71,8 +353,5 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); // cost estimation sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange); -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); - -// special values to mark the index of ranges that reach across the segment border -#define RA_INTER_RANGE_START (-1) -#define RA_INTER_RANGE_END (0x70000000) +//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 0589d6603..10e3dc069 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -3,12 +3,121 @@ #include +// special values to mark the index of ranges that reach across the segment border +#define RA_INTER_RANGE_START (-1) +#define RA_INTER_RANGE_END (0x70000000) + struct IMLSegmentPoint { + friend struct IMLSegmentInterval; + sint32 index; - struct IMLSegment* imlSegment; + struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of IMLSegmentPoint* next; IMLSegmentPoint* prev; + + // the index is the instruction index times two. + // this gives us the ability to cover half an instruction with RA ranges + // covering only the first half of an instruction (0-0) means that the register is read, but not preserved + // covering first and the second half means the register is read and preserved + // covering only the second half means the register is written but not read + + sint32 GetInstructionIndex() const + { + return index; + } + + void SetInstructionIndex(sint32 index) + { + this->index = index; + } + + void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount) + { + if (!IsPreviousSegment() && !IsNextSegment()) + { + if (GetInstructionIndex() >= instructionIndex) + index += shiftCount; + } + } + + void DecrementByOneInstruction() + { + index--; + } + + // the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment + bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; } + bool IsNextSegment() const { return index == RA_INTER_RANGE_END; } + + // overload operand > and < + bool operator>(const IMLSegmentPoint& other) const { return index > other.index; } + bool operator<(const IMLSegmentPoint& other) const { return index < other.index; } + bool operator==(const IMLSegmentPoint& other) const { return index == other.index; } + bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; } + + // overload comparison operands for sint32 + bool operator>(const sint32 other) const { return index > other; } + bool operator<(const sint32 other) const { return index < other; } + bool operator<=(const sint32 other) const { return index <= other; } + bool operator>=(const sint32 other) const { return index >= other; } +}; + +struct IMLSegmentInterval +{ + IMLSegmentPoint start; + IMLSegmentPoint end; + + bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; } + + bool IsRangeOverlapping(const IMLSegmentInterval& other) + { + // todo - compare the raw index + sint32 r1start = this->start.GetInstructionIndex(); + sint32 r1end = this->end.GetInstructionIndex(); + sint32 r2start = other.start.GetInstructionIndex(); + sint32 r2end = other.end.GetInstructionIndex(); + if (r1start < r2end && r1end > r2start) + return true; + if (this->start.IsPreviousSegment() && r1start == r2start) + return true; + if (this->end.IsNextSegment() && r1end == r2end) + return true; + return false; + } + + bool ExtendsIntoPreviousSegment() const + { + return start.IsPreviousSegment(); + } + + bool ExtendsIntoNextSegment() const + { + return end.IsNextSegment(); + } + + bool IsNextSegmentOnly() const + { + if(!start.IsNextSegment()) + return false; + cemu_assert_debug(end.IsNextSegment()); + return true; + } + + bool IsPreviousSegmentOnly() const + { + if (!end.IsPreviousSegment()) + return false; + cemu_assert_debug(start.IsPreviousSegment()); + return true; + } + + sint32 GetDistance() const + { + // todo - assert if either start or end is outside the segment + // we may also want to switch this to raw indices? + return end.GetInstructionIndex() - start.GetInstructionIndex(); + } }; struct PPCSegmentRegisterAllocatorInfo_t diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 846426f58..db48b9c05 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -18,6 +18,8 @@ #include "BackendX64/BackendX64.h" #include "util/highresolutiontimer/HighResolutionTimer.h" +#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock + struct PPCInvalidationRange { MPTR startAddress; @@ -41,11 +43,36 @@ void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)(); PPCRecompilerInstanceData_t* ppcRecompilerInstanceData; +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION +static std::mutex s_singleRecompilationMutex; +#endif + bool ppcRecompilerEnabled = false; +void PPCRecompiler_recompileAtAddress(uint32 address); + // this function does never block and can fail if the recompiler lock cannot be acquired immediately void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress) { +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) + return; + PPCRecompilerState.recompilerSpinlock.lock(); + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) + { + PPCRecompilerState.recompilerSpinlock.unlock(); + return; + } + ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited; + PPCRecompilerState.recompilerSpinlock.unlock(); + s_singleRecompilationMutex.lock(); + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] == PPCRecompiler_leaveRecompilerCode_visited) + { + PPCRecompiler_recompileAtAddress(enterAddress); + } + s_singleRecompilationMutex.unlock(); + return; +#endif // quick read-only check without lock if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) return; @@ -154,6 +181,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } } + // if(range.startAddress < 0x0202fa3C || range.startAddress > 0x0202FA7C) + // return nullptr; // DEBUG + PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; @@ -182,6 +212,85 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } } + // DEBUG BEGIN + // if(ppcRecFunc->ppcAddress != 0x2BDA9F4) // TP + // { + // delete ppcRecFunc; + // return nullptr; + // } + // if(ppcRecFunc->ppcAddress < 0x2BDA9F4) // TP + // { + // delete ppcRecFunc; + // return nullptr; + // } + + // this prevents the crashing + // if((ppcRecFunc->ppcAddress >= 0x02ade400 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash + //if((ppcRecFunc->ppcAddress >= 0x02ade500 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash + // if((ppcRecFunc->ppcAddress >= 0x02ade580 && ppcRecFunc->ppcAddress < 0x02ade600)) // -> crashed around 0x0x2b874b0 (but rare? Out of 5 runs it only crashed once) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // the problem with Shovel Knight is that the crash seems to be pretty instable, at least when trying to narrow it down. Lets look for another game for now + + // check TP bug... + // if(ppcRecFunc->ppcAddress >= 0x03000000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02800000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02C00000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02A00000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B00000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B80000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B40000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B60000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B70000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B68000) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B64000) -> no bug (I went into wrong direction) + // if(ppcRecFunc->ppcAddress >= 0x02B6C000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B6A000) -> has bug (double checked, it has bug) + // if(ppcRecFunc->ppcAddress >= 0x02B6B000) -> has bug (I went into the wrong direction again? Or does A000 have no bug?? + // if(ppcRecFunc->ppcAddress >= 0x02B69000) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B68800) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B68400) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B68600) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B68500) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B68580) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B685C0) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B685A0) -> has bug + // if(ppcRecFunc->ppcAddress >= 0x02B68590) -> no bug + // if(ppcRecFunc->ppcAddress >= 0x02B68598) -> has bug + + // if(ppcRecFunc->ppcAddress != 0x02B68594) -> seems fine. No bug (against the expectation) + // if(ppcRecFunc->ppcAddress == 0x02B68594) -> Still has the bug + + // if(ppcRecFunc->ppcAddress == 0x02B68594) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // if(ppcRecFunc->ppcAddress >= 0x2B7A8D4 && ppcRecFunc->ppcAddress < 0x02B7AC9C && ppcRecFunc->ppcAddress != 0x2B7A8D4) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // doing both of these means no bug! + // excluding just ppcAddress == 0x2B7A8D4 is enough to trigger the bug again. So it definitely that function + // next: Debug it! + + // In Pikmin 3 030a9998 is broken? + // if(!(ppcRecFunc->ppcAddress >= 0x030a9998 && ppcRecFunc->ppcAddress < 0x030AA208)) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // else + // { + // delete ppcRecFunc; + // return nullptr; + // } + + // DEBUG END + // apply passes if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) @@ -190,13 +299,58 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - //if (ppcRecFunc->ppcAddress == 0x30DF5F8) - //{ - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - //} - + // TP + // if (ppcRecFunc->ppcAddress == 0x2B7A8D4) + // { + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // //__debugbreak(); + // } + // // Bad Function in SM3DW + // if (ppcRecFunc->ppcAddress == 0x023D5768) + // { + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // } + // if (ppcRecFunc->ppcAddress >= 0x023D5768 && ppcRecFunc->ppcAddress < 0x023D58DC) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // + + // + // // 0x02846c74 + // if (ppcRecFunc->ppcAddress == 0x02846c74) + // { + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + // } + + // Shovel Knight + // if (ppcRecFunc->ppcAddress >= 0x02A1E630 && ppcRecFunc->ppcAddress < 0x02A1E9D8) + // { + // // debug_printf("----------------------------------------\n"); + // // IMLDebug_Dump(&ppcImlGenContext); + // // __debugbreak(); + // delete ppcRecFunc; + // return nullptr; + // } + // + // // + // if (ppcRecFunc->ppcAddress == 0x02ade5c4 || ppcRecFunc->ppcAddress == 0x02ade5c8) + // { + // // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + // } + + // else + // { + // delete ppcRecFunc; + // return nullptr; + // } //if (ppcRecFunc->ppcAddress == 0x11223344) //{ @@ -210,14 +364,26 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // return nullptr; //} - //if (ppcRecFunc->ppcAddress == 0x03C26844) - //{ - // __debugbreak(); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - //} + // if (ppcRecFunc->ppcAddress >= 0x2BDA9F4 && ppcRecFunc->ppcAddress < 0x02BDAB38) + // { + // return nullptr; + // //IMLDebug_Dump(&ppcImlGenContext); + // //__debugbreak(); + // } + + // if (ppcRecFunc->ppcAddress == 0x2BDA9F4) + // { + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + // } // 31A8778 + // if(ppcRecFunc->ppcAddress >= 0x2759E20 && ppcRecFunc->ppcAddress < 0x0275A0CC) + // { + // delete ppcRecFunc; + // return nullptr; + // } + // Functions for testing (botw): // 3B4049C (large with switch case) // 30BF118 (has a bndz copy loop + some float instructions at the end) @@ -231,6 +397,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + if (ppcRecFunc->ppcAddress == 0x2B7A8D4) + { + // write code to binary file + FILE* f = fopen("ppcRecFunc_2B7A8D4.bin", "wb"); + fwrite(ppcRecFunc->x86Code, 1, ppcRecFunc->x86Size, f); + fclose(f); + } + // collect list of PPC-->x64 entry points entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) @@ -255,7 +429,7 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP codeHash += ((uint8*)ppcRecFunc->x86Code)[i]; } - //cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); + cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); return ppcRecFunc; } @@ -323,11 +497,14 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); -// if(ppcImlGenContext.debug_entryPPCAddress == 0x0200E1E8) -// { -// IMLDebug_Dump(&ppcImlGenContext); -// __debugbreak(); -// } + + // if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8 && ppcImlGenContext.debug_entryPPCAddress < 0x0240C0AC) + // { + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + // } + // else if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8) + // return false; return true; } @@ -438,6 +615,10 @@ std::atomic_bool s_recompilerThreadStopSignal{false}; void PPCRecompiler_thread() { SetThreadName("PPCRecompiler"); +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION + return; +#endif + while (true) { if(s_recompilerThreadStopSignal) @@ -765,4 +946,4 @@ void PPCRecompiler_Shutdown() // mark as unmapped ppcRecompiler_reservedBlockMask[i] = false; } -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b89b7f7c7..b637b594b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1746,7 +1746,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index) { segmentPoint->imlSegment = imlSegment; - segmentPoint->index = index; + segmentPoint->SetInstructionIndex(index); if (imlSegment->segmentPointList) imlSegment->segmentPointList->prev = segmentPoint; segmentPoint->prev = nullptr; @@ -1766,7 +1766,7 @@ void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint) /* * Insert multiple no-op instructions -* Warning: Can invalidate any previous instruction structs from the same segment +* Warning: Can invalidate any previous instruction pointers from the same segment */ void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount) { @@ -1788,12 +1788,7 @@ void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList; while (segmentPoint) { - if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END) - { - if (segmentPoint->index >= index) - segmentPoint->index += shiftBackCount; - } - // next + segmentPoint->ShiftIfAfter(index, shiftBackCount); segmentPoint = segmentPoint->next; } } @@ -2864,6 +2859,76 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc { uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; + + // DEBUG BEGIN + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4+0x10) -> stops bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0) -> has bug (optional code path) + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA50) -> stops bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC34) -> stops bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78) -> has bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70) -> has bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC88) -> has bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C) -> has bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38) -> no bug + // weirdly, excluding 0x02B7AC38 fixes the issue. Excluding both 0x02B7AC3C and 0x2B7AC88 (the follow up instructions) does not fix the bug + + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABE4) -> has bug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AAD0) -> fixes bug + + // maybe try to place as many leave instructions as possible while keeping the bug alive + // eventually we should end up with a relatively small IR footprint that is easier to analyze + + // 0x023d5818 + // SM3DW debug + // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x23D58A8) + // { + // ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); + // } + +#if 0 // TP + if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABE4 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABC0 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABA8 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB90 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB04 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7abc4 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9B0 || // verified + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa10 -> fixes bug (this is after a bl) + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AA3C || // verified + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA44 -> fixes bug (this is on the main path, the one before, 0x02B7AA3C, does not break) + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABC4 || // verified + ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7ac88 || // verified + // ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aad0 || -> fixes it + // ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa30 || -> fixes it (mostly. There was a small glitch on eponas tail?) + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa24 || -> this fixes it + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A918 || -> this fixes it + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9A0 || -> this fixes it + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38 || -> this fixes it + //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4 || -> this fixes it + (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AC44 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7AC84) || // verified + (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AADC && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7ABC0) || // verified + (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7A9B0 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA0C) || + (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AAE4 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02b7ac20) // verified + + // disabling IMLOptimizerX86_SubstituteCJumpForEflagsJump fixes it... + + //(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AA1C && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA40) -> fixes it + ) + { + ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); + // this doesnt work any longer because the basic blocks are determined before the recompiler is called + basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchTaken(nullptr); + basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchNotTaken(nullptr); + break; // but we should be able to just exit the block early? + } +#endif + if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) { debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); From a0ad48c98163d5f68cf79c0b3db832c598d83aae Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 19 Oct 2024 02:11:26 +0200 Subject: [PATCH 45/64] PPCRec: Some fixes --- src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp | 8 ++++++-- .../Recompiler/IML/IMLRegisterAllocatorRanges.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 1d38eb3b2..709638f4b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1000,7 +1000,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry); - cemu_assert_debug(regCarry != regR && regCarry != regA); + bool carryRegIsShared = regCarry == regA || regCarry == regB; + cemu_assert_debug(regCarry != regR); // two outputs sharing the same register is undefined behavior switch (imlInstruction->operation) { @@ -1009,9 +1010,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc std::swap(regB, regA); if (regR != regA) x64GenContext->emitter->MOV_dd(regR, regA); - x64GenContext->emitter->XOR_dd(regCarry, regCarry); + if(!carryRegIsShared) + x64GenContext->emitter->XOR_dd(regCarry, regCarry); x64GenContext->emitter->ADD_dd(regR, regB); x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag + if(carryRegIsShared) + x64GenContext->emitter->AND_di8(regCarry, 1); // clear upper bits break; case PPCREC_IML_OP_ADD_WITH_CARRY: // assumes that carry is already correctly initialized as 0 or 1 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index e58b7888c..2c041ee35 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -133,9 +133,11 @@ void PPCRecRARange_addLink_perVirtualGPR(std::unordered_maplink_sameVirtualRegister.next = it->second; + raLivenessRange* priorFirst = it->second; + subrange->link_sameVirtualRegister.next = priorFirst; it->second = subrange; - subrange->link_sameVirtualRegister.prev = subrange; + subrange->link_sameVirtualRegister.prev = nullptr; + priorFirst->link_sameVirtualRegister.prev = subrange; } } From 8614150116244c99bd96f50fc32b23689d51416a Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 19 Oct 2024 02:35:55 +0200 Subject: [PATCH 46/64] PPCRec: Support for arbitrary function calls in the IR Used for MFTBU/MFTBL instruction --- .../Recompiler/BackendX64/BackendX64.cpp | 11 ++++ .../Recompiler/IML/IMLInstruction.cpp | 30 ++++++++++- .../Espresso/Recompiler/IML/IMLInstruction.h | 23 ++++++++ .../Recompiler/IML/IMLRegisterAllocator.cpp | 52 +++++++++++++++---- .../IML/IMLRegisterAllocatorRanges.cpp | 15 ++++++ .../Recompiler/PPCRecompilerImlGen.cpp | 22 ++++++-- 6 files changed, 140 insertions(+), 13 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 709638f4b..8140d7512 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -598,6 +598,13 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here } +void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here + x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress); + x64GenContext->emitter->CALL_q(X86_REG_RAX); +} + bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { auto regR = _reg32(imlInstruction->op_r_r.regR); @@ -1574,6 +1581,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } + else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { // no op diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 53841bafc..4b56ff94e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -222,6 +222,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + if (op_call_imm.regParam0.IsValid()) + registersUsed->readGPR1 = op_call_imm.regParam0; + if (op_call_imm.regParam1.IsValid()) + registersUsed->readGPR2 = op_call_imm.regParam1; + if (op_call_imm.regParam2.IsValid()) + registersUsed->readGPR3 = op_call_imm.regParam2; + registersUsed->writtenGPR1 = op_call_imm.regReturn; + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { // fpr load operation @@ -631,6 +641,16 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable); + if (op_call_imm.regParam0.IsValid()) + op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable); + if (op_call_imm.regParam1.IsValid()) + op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable); + if (op_call_imm.regParam2.IsValid()) + op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable); + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); @@ -757,6 +777,10 @@ void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegist { ; } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + // not affected + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); @@ -866,7 +890,11 @@ void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegist } else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - ; + // not affected + } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + // not affected } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 7594bc9f6..e7c58e8eb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -99,6 +99,7 @@ class IMLReg }; static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0); +static const IMLRegID IMLRegID_INVALID(0xFFFF); using IMLName = uint32; @@ -256,6 +257,9 @@ enum // conditional (legacy) PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // function call + PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address + // FPR PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) @@ -516,6 +520,14 @@ struct IMLInstruction sint32 immS32; }op_storeLoad; struct + { + uintptr_t callAddress; + IMLReg regParam0; + IMLReg regParam1; + IMLReg regParam2; + IMLReg regReturn; + }op_call_imm; + struct { IMLReg regR; IMLReg regA; @@ -775,6 +787,17 @@ struct IMLInstruction this->op_atomic_compare_store.regBoolOut = regSuccessOutput; } + void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn) + { + this->type = PPCREC_IML_TYPE_CALL_IMM; + this->operation = 0; + this->op_call_imm.callAddress = callAddress; + this->op_call_imm.regParam0 = param0; + this->op_call_imm.regParam1 = param1; + this->op_call_imm.regParam2 = param2; + this->op_call_imm.regReturn = regReturn; + } + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_FPR_COMPARE; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 9b9ce15fa..048b316d9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -60,8 +60,8 @@ struct IMLFixedRegisters IMLReg reg; IMLPhysRegisterSet physRegSet; }; - boost::container::static_vector listInput; // fixed registers for input edge - boost::container::static_vector listOutput; // fixed registers for output edge + boost::container::small_vector listInput; // fixed registers for instruction input edge + boost::container::small_vector listOutput; // fixed registers for instruction output edge }; static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) @@ -86,7 +86,38 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); } - // todo - for volatile registers during call, we can emit a bunch of ranges that cover the output edge of the CALL instruction and use a special vGPR to indicate its not an actually mapped register + else if(instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + // parameters (todo) + cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid()); + cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid()); + cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid()); + // return value + if(instruction->op_call_imm.regReturn.IsValid()) + { + IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat(); + bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8; + cemu_assert_debug(isIntegerFormat); // float return values are still todo + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); + fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps); + } + // block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call + IMLPhysRegisterSet ps; + if(!instruction->op_call_imm.regReturn.IsValid()) + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11); + for(int i=0; i<=5; i++) + ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile + // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use + fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps); + } + } @@ -232,7 +263,7 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; for(auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.GetRegID() != ourRegId) + if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) { cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) @@ -487,7 +518,7 @@ std::vector IMLRA_BuildSegmentInstructionFixedReg pos = pos + 1; for(auto& fixedRegAccess : fixedRegs.listOutput) { - frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid()?fixedRegAccess.reg.GetRegID():IMLRegID_INVALID); } index++; } @@ -556,7 +587,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment continue; boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); - cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement + if(entry.regId != IMLRegID_INVALID) + cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers for(auto& range : overlappingRanges) { @@ -1013,7 +1045,7 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; for(auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.GetRegID() != ourRegId) + if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); } } @@ -1451,11 +1483,13 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); for(auto& fixedRegAccess : fixedRegs.listInput) { - AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); + if(fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); } for(auto& fixedRegAccess : fixedRegs.listOutput) { - AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); + if(fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); } index++; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 2c041ee35..270a133ac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -152,6 +152,20 @@ void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRa void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) { +#ifdef CEMU_DEBUG_ASSERT + raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second; + bool hasRangeFound = false; + while(cur) + { + if(cur == subrange) + { + hasRangeFound = true; + break; + } + cur = cur->link_sameVirtualRegister.next; + } + cemu_assert_debug(hasRangeFound); +#endif IMLRegID regId = subrange->GetVirtualRegister(); raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; @@ -169,6 +183,7 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_mapsecond == subrange); root.erase(regId); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b637b594b..ed3cfa1ee 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -6,6 +6,7 @@ #include "IML/IML.h" #include "IML/IMLRegisterAllocatorRanges.h" #include "PPCFunctionBoundaryTracker.h" +#include "Cafe/OS/libs/coreinit/coreinit_Time.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); @@ -398,15 +399,30 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } -bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +ATTR_MS_ABI uint32 PPCRecompiler_GetTBL() { - printf("PPCRecompilerImlGen_MFTB(): Not supported\n"); - return false; + return (uint32)coreinit::coreinit_getTimerTick(); +} +ATTR_MS_ABI uint32 PPCRecompiler_GetTBU() +{ + return (uint32)(coreinit::coreinit_getTimerTick() >> 32); +} + +bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); + if( spr == SPR_TBL || spr == SPR_TBU ) + { + IMLReg resultReg = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_call_imm(spr == SPR_TBL ? (uintptr_t)PPCRecompiler_GetTBL : (uintptr_t)PPCRecompiler_GetTBU, IMLREG_INVALID, IMLREG_INVALID, IMLREG_INVALID, resultReg); + return true; + } + return false; + if (spr == 268 || spr == 269) { // TBL / TBU From 97ef9524b08f67baf175d2c596a4fa231a3f6d43 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 19 Oct 2024 05:48:38 +0200 Subject: [PATCH 47/64] PPCRec: Added dump option for recompiled functions + more fixes --- .../Recompiler/IML/IMLRegisterAllocator.cpp | 45 +++++++------ .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 13 ++-- src/config/ActiveSettings.cpp | 10 +++ src/config/ActiveSettings.h | 3 + src/gui/MainWindow.cpp | 63 +++++++++---------- src/gui/MainWindow.h | 3 +- 6 files changed, 74 insertions(+), 63 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 048b316d9..0453fae3e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -265,7 +265,7 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi { if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) { - cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider + cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) return currentPos.GetRaw() - startPosition.GetRaw(); } @@ -572,30 +572,35 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment for(size_t i=0; isecond != entry.regId; - else - vgprHasChanged = true; - lastVGPR[physReg] = entry.regId; + // we currently only handle fixed register requirements with a single register + // with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers + cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable()); + for(IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg+1)) + { + // check if the assigned vGPR has changed + bool vgprHasChanged = false; + auto it = lastVGPR.find(physReg); + if(it != lastVGPR.end()) + vgprHasChanged = it->second != entry.regId; + else + vgprHasChanged = true; + lastVGPR[physReg] = entry.regId; - if(!vgprHasChanged) - continue; + if(!vgprHasChanged) + continue; - boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); - if(entry.regId != IMLRegID_INVALID) - cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers + boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); + if(entry.regId != IMLRegID_INVALID) + cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers - for(auto& range : overlappingRanges) - { - if(range->interval2.start < entry.pos) + for(auto& range : overlappingRanges) { - PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true); + if(range->interval2.start < entry.pos) + { + PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true); + } } + } } // finally iterate ranges and assign fixed registers diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index db48b9c05..980c9ce05 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -396,13 +396,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP { return nullptr; } - - if (ppcRecFunc->ppcAddress == 0x2B7A8D4) + if (ActiveSettings::DumpRecompilerFunctionsEnabled()) { - // write code to binary file - FILE* f = fopen("ppcRecFunc_2B7A8D4.bin", "wb"); - fwrite(ppcRecFunc->x86Code, 1, ppcRecFunc->x86Size, f); - fclose(f); + FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress))); + if (fs) + { + fs->writeData(ppcRecFunc->x86Code, ppcRecFunc->x86Size); + delete fs; + } } // collect list of PPC-->x64 entry points diff --git a/src/config/ActiveSettings.cpp b/src/config/ActiveSettings.cpp index 560f29868..60136e1d7 100644 --- a/src/config/ActiveSettings.cpp +++ b/src/config/ActiveSettings.cpp @@ -165,6 +165,11 @@ bool ActiveSettings::DumpTexturesEnabled() return s_dump_textures; } +bool ActiveSettings::DumpRecompilerFunctionsEnabled() +{ + return s_dump_recompiler_functions; +} + bool ActiveSettings::DumpLibcurlRequestsEnabled() { return s_dump_libcurl_requests; @@ -180,6 +185,11 @@ void ActiveSettings::EnableDumpTextures(bool state) s_dump_textures = state; } +void ActiveSettings::EnableDumpRecompilerFunctions(bool state) +{ + s_dump_recompiler_functions = state; +} + void ActiveSettings::EnableDumpLibcurlRequests(bool state) { s_dump_libcurl_requests = state; diff --git a/src/config/ActiveSettings.h b/src/config/ActiveSettings.h index e672fbee9..0d7ecfec7 100644 --- a/src/config/ActiveSettings.h +++ b/src/config/ActiveSettings.h @@ -109,9 +109,11 @@ class ActiveSettings // dump options [[nodiscard]] static bool DumpShadersEnabled(); [[nodiscard]] static bool DumpTexturesEnabled(); + [[nodiscard]] static bool DumpRecompilerFunctionsEnabled(); [[nodiscard]] static bool DumpLibcurlRequestsEnabled(); static void EnableDumpShaders(bool state); static void EnableDumpTextures(bool state); + static void EnableDumpRecompilerFunctions(bool state); static void EnableDumpLibcurlRequests(bool state); // hacks @@ -125,6 +127,7 @@ class ActiveSettings // dump options inline static bool s_dump_shaders = false; inline static bool s_dump_textures = false; + inline static bool s_dump_recompiler_functions = false; inline static bool s_dump_libcurl_requests = false; // timer speed diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index c83ab16b4..e38cb6177 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -143,6 +143,7 @@ enum // debug->dump MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES = 21600, MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, + MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, MAINFRAME_MENU_ID_DEBUG_DUMP_RAM, MAINFRAME_MENU_ID_DEBUG_DUMP_FST, MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, @@ -204,8 +205,9 @@ EVT_MENU_RANGE(MAINFRAME_MENU_ID_NFC_RECENT_0 + 0, MAINFRAME_MENU_ID_NFC_RECENT_ EVT_MENU_RANGE(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + 0, MAINFRAME_MENU_ID_DEBUG_LOGGING0 + 98, MainWindow::OnDebugLoggingToggleFlagGeneric) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO, MainWindow::OnPPCInfoToggle) // debug -> dump menu -EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES, MainWindow::OnDebugDumpUsedTextures) -EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, MainWindow::OnDebugDumpUsedShaders) +EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES, MainWindow::OnDebugDumpGeneric) +EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, MainWindow::OnDebugDumpGeneric) +EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, MainWindow::OnDebugDumpGeneric) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, MainWindow::OnDebugSetting) // debug -> Other options EVT_MENU(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, MainWindow::OnDebugSetting) @@ -1084,45 +1086,35 @@ void MainWindow::OnPPCInfoToggle(wxCommandEvent& event) g_config.Save(); } -void MainWindow::OnDebugDumpUsedTextures(wxCommandEvent& event) +void MainWindow::OnDebugDumpGeneric(wxCommandEvent& event) { - const bool value = event.IsChecked(); - ActiveSettings::EnableDumpTextures(value); - if (value) + std::string dumpSubpath; + std::function setDumpState; + switch(event.GetId()) { - try - { - // create directory - const fs::path path(ActiveSettings::GetUserDataPath()); - fs::create_directories(path / "dump" / "textures"); - } - catch (const std::exception& ex) - { - SystemException sys(ex); - cemuLog_log(LogType::Force, "can't create texture dump folder: {}", ex.what()); - ActiveSettings::EnableDumpTextures(false); - } + case MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES: + dumpSubpath = "dump/textures"; + setDumpState = ActiveSettings::EnableDumpTextures; + break; + case MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS: + dumpSubpath = "dump/shaders"; + setDumpState = ActiveSettings::EnableDumpShaders; + break; + case MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS: + dumpSubpath = "dump/recompiler"; + setDumpState = ActiveSettings::EnableDumpRecompilerFunctions; + break; + default: + UNREACHABLE; } -} - -void MainWindow::OnDebugDumpUsedShaders(wxCommandEvent& event) -{ const bool value = event.IsChecked(); - ActiveSettings::EnableDumpShaders(value); + setDumpState(value); if (value) { - try - { - // create directory - const fs::path path(ActiveSettings::GetUserDataPath()); - fs::create_directories(path / "dump" / "shaders"); - } - catch (const std::exception & ex) - { - SystemException sys(ex); - cemuLog_log(LogType::Force, "can't create shaders dump folder: {}", ex.what()); - ActiveSettings::EnableDumpShaders(false); - } + std::error_code ec; + auto dumpDir = ActiveSettings::GetUserDataPath(dumpSubpath); + if(!fs::exists(dumpDir, ec) && !fs::create_directories(dumpDir, ec)) + setDumpState(false); } } @@ -2233,6 +2225,7 @@ void MainWindow::RecreateMenu() wxMenu* debugDumpMenu = new wxMenu; debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES, _("&Textures"), wxEmptyString)->Check(ActiveSettings::DumpTexturesEnabled()); debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, _("&Shaders"), wxEmptyString)->Check(ActiveSettings::DumpShadersEnabled()); + debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, _("&Recompiler functions"), wxEmptyString)->Check(ActiveSettings::DumpRecompilerFunctionsEnabled()); debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, _("&nlibcurl HTTP/HTTPS requests"), wxEmptyString); // debug submenu wxMenu* debugMenu = new wxMenu(); diff --git a/src/gui/MainWindow.h b/src/gui/MainWindow.h index beb86f98b..ddb9795dc 100644 --- a/src/gui/MainWindow.h +++ b/src/gui/MainWindow.h @@ -107,8 +107,7 @@ class MainWindow : public wxFrame, public CafeSystem::SystemImplementation void OnDebugSetting(wxCommandEvent& event); void OnDebugLoggingToggleFlagGeneric(wxCommandEvent& event); void OnPPCInfoToggle(wxCommandEvent& event); - void OnDebugDumpUsedTextures(wxCommandEvent& event); - void OnDebugDumpUsedShaders(wxCommandEvent& event); + void OnDebugDumpGeneric(wxCommandEvent& event); void OnLoggingWindow(wxCommandEvent& event); void OnGDBStubToggle(wxCommandEvent& event); void OnDebugViewPPCThreads(wxCommandEvent& event); From aa904b6d1cf71189ad1b7d44c7fbe5d52973dc87 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:28:05 +0200 Subject: [PATCH 48/64] PPCRec: Clean up code and optimize --- .../Recompiler/IML/IMLRegisterAllocator.cpp | 68 ++------ .../IML/IMLRegisterAllocatorRanges.cpp | 6 +- .../IML/IMLRegisterAllocatorRanges.h | 4 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 158 ------------------ .../Recompiler/PPCRecompilerImlGen.cpp | 69 -------- 5 files changed, 20 insertions(+), 285 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 0453fae3e..ca4280283 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -543,23 +543,31 @@ boost::container::small_vector IMLRA_GetRangeWithFixedRegRe void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border (we can later optimize this) - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border + // todo - this can be optimized. Ranges only need to be split if there are conflicts with other segments. Note that below passes rely on the fact that this pass currently splits all ranges with fixed register requirements + for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) { IMLPhysRegisterSet allowedRegs; if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + currentRange = currentRange->link_allSegmentRanges.next; continue; + } if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) { + raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next; PPCRecRA_explodeRange(ppcImlGenContext, currentRange); - // currentRange may be invalidated, therefore iterate from the beginning again (todo - can be optimized) - currentRange = imlSegment->raInfo.linkedList_allSubranges; + currentRange = nextRange; + continue; } + currentRange = currentRange->link_allSegmentRanges.next; } // second pass - look for ranges with conflicting fixed register requirements and split these too (locally) for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; + if(currentRange->list_fixedRegRequirements.empty()) + continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment if(!currentRange->GetAllowedRegistersEx(allowedRegs)) continue; if(allowedRegs.HasAnyAvailable()) @@ -607,6 +615,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; + if(currentRange->list_fixedRegRequirements.empty()) + continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment if(!currentRange->GetAllowedRegistersEx(allowedRegs)) { cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); @@ -1074,23 +1084,8 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocato IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet); } -void __DebugTestA(IMLSegment* imlSegment) -{ - // iterate all ranges - raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if(!subrangeItr->list_fixedRegRequirements.empty()) - { - cemu_assert_debug(subrangeItr->HasPhysicalRegister()); - } - subrangeItr = subrangeItr->link_allSegmentRanges.next; - } -} - bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - DbgVerifyAllRanges(ctx); // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); @@ -1108,7 +1103,6 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon RASpillStrategy_ExplodeRangeInter explodeRangeInter; }strategy; - sint32 dbgIndex = 0; while(subrangeItr) { raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor @@ -1147,7 +1141,6 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler) candidatePhysRegSet &= allowedRegs; - __DebugTestA(imlSegment); for (auto& liverangeItr : livenessTimeline.activeRanges) { cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); @@ -1175,7 +1168,6 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon subrangeItr = subrangeItr->link_allSegmentRanges.next; // next continue; } - __DebugTestA(imlSegment); // there is no free register for the entire range // evaluate different strategies of splitting ranges to free up another register or shorten the current range strategy.localRangeHoleCutting.Reset(); @@ -1205,20 +1197,17 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon // evaluate strategy: Explode inter-segment ranges strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); SelectStrategyIfBetter(strategy.explodeRange); - __DebugTestA(imlSegment); } else // if subrangeItr->interval2.ExtendsIntoNextSegment() { strategy.explodeRangeInter.Reset(); strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); SelectStrategyIfBetter(strategy.explodeRangeInter); - __DebugTestA(imlSegment); } // choose strategy if(selectedStrategy) { selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr); - __DebugTestA(imlSegment); } else { @@ -1226,12 +1215,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon cemu_assert_debug(subrangeItr->interval2.ExtendsPreviousSegment()); // alternative strategy if we have no other choice: explode current range PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); - __DebugTestA(imlSegment); } - // DEBUG BEGIN - DbgVerifyAllRanges(ctx); - dbgIndex++; - // DEBUG END return false; } return true; @@ -2131,37 +2115,15 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext ctx.raParam = &raParam; ctx.deprGenContext = ppcImlGenContext; - DbgVerifyAllRanges(ctx); // DEBUG - IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); - - DbgVerifyAllRanges(ctx); // DEBUG - ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment - - DbgVerifyAllRanges(ctx); // DEBUG ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); - IMLRA_CalculateLivenessRanges(ctx); - DbgVerifyAllRanges(ctx); // DEBUG IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); - DbgVerifyAllRanges(ctx); // DEBUG IMLRA_AssignRegisters(ctx, ppcImlGenContext); DbgVerifyAllRanges(ctx); // DEBUG - - // debug print - //IMLDebug_Dump(ppcImlGenContext, true); - - // debug print - // if (ppcImlGenContext->debug_entryPPCAddress == 0x2BDA9F4) - // { - // IMLDebug_Dump(ppcImlGenContext, true); - // __debugbreak(); - // } - IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); IMLRA_GenerateMoveInstructions(ctx); - - PPCRecRA_deleteAllRanges(ppcImlGenContext); + IMLRA_DeleteAllRanges(ppcImlGenContext); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 270a133ac..86e0091c2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -32,10 +32,10 @@ void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister) range->physicalRegister = physicalRegister; } -boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() +boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() { uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); - boost::container::small_vector subranges; + boost::container::small_vector subranges; subranges.push_back(this); this->lastIterationIndex = iterationIndex; size_t i = 0; @@ -302,7 +302,7 @@ void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLive } } -void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) { for(auto& seg : ppcImlGenContext->segmentList2) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 4467d2f01..4586bb07b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -324,7 +324,7 @@ struct raLivenessRange // register allocator result sint32 physicalRegister; - boost::container::small_vector GetAllSubrangesInCluster(); + boost::container::small_vector GetAllSubrangesInCluster(); bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out @@ -339,7 +339,7 @@ struct raLivenessRange raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); -void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 980c9ce05..84d53b266 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -212,85 +212,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } } - // DEBUG BEGIN - // if(ppcRecFunc->ppcAddress != 0x2BDA9F4) // TP - // { - // delete ppcRecFunc; - // return nullptr; - // } - // if(ppcRecFunc->ppcAddress < 0x2BDA9F4) // TP - // { - // delete ppcRecFunc; - // return nullptr; - // } - - // this prevents the crashing - // if((ppcRecFunc->ppcAddress >= 0x02ade400 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash - //if((ppcRecFunc->ppcAddress >= 0x02ade500 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash - // if((ppcRecFunc->ppcAddress >= 0x02ade580 && ppcRecFunc->ppcAddress < 0x02ade600)) // -> crashed around 0x0x2b874b0 (but rare? Out of 5 runs it only crashed once) - // { - // delete ppcRecFunc; - // return nullptr; - // } - // the problem with Shovel Knight is that the crash seems to be pretty instable, at least when trying to narrow it down. Lets look for another game for now - - // check TP bug... - // if(ppcRecFunc->ppcAddress >= 0x03000000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02800000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02C00000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02A00000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B00000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B80000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B40000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B60000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B70000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B68000) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B64000) -> no bug (I went into wrong direction) - // if(ppcRecFunc->ppcAddress >= 0x02B6C000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B6A000) -> has bug (double checked, it has bug) - // if(ppcRecFunc->ppcAddress >= 0x02B6B000) -> has bug (I went into the wrong direction again? Or does A000 have no bug?? - // if(ppcRecFunc->ppcAddress >= 0x02B69000) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B68800) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B68400) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B68600) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B68500) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B68580) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B685C0) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B685A0) -> has bug - // if(ppcRecFunc->ppcAddress >= 0x02B68590) -> no bug - // if(ppcRecFunc->ppcAddress >= 0x02B68598) -> has bug - - // if(ppcRecFunc->ppcAddress != 0x02B68594) -> seems fine. No bug (against the expectation) - // if(ppcRecFunc->ppcAddress == 0x02B68594) -> Still has the bug - - // if(ppcRecFunc->ppcAddress == 0x02B68594) - // { - // delete ppcRecFunc; - // return nullptr; - // } - // if(ppcRecFunc->ppcAddress >= 0x2B7A8D4 && ppcRecFunc->ppcAddress < 0x02B7AC9C && ppcRecFunc->ppcAddress != 0x2B7A8D4) - // { - // delete ppcRecFunc; - // return nullptr; - // } - // doing both of these means no bug! - // excluding just ppcAddress == 0x2B7A8D4 is enough to trigger the bug again. So it definitely that function - // next: Debug it! - - // In Pikmin 3 030a9998 is broken? - // if(!(ppcRecFunc->ppcAddress >= 0x030a9998 && ppcRecFunc->ppcAddress < 0x030AA208)) - // { - // delete ppcRecFunc; - // return nullptr; - // } - // else - // { - // delete ppcRecFunc; - // return nullptr; - // } - - // DEBUG END - // apply passes if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) @@ -299,90 +220,11 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - // TP - // if (ppcRecFunc->ppcAddress == 0x2B7A8D4) - // { - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // //__debugbreak(); - // } - // // Bad Function in SM3DW - // if (ppcRecFunc->ppcAddress == 0x023D5768) - // { - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // } - // if (ppcRecFunc->ppcAddress >= 0x023D5768 && ppcRecFunc->ppcAddress < 0x023D58DC) - // { - // delete ppcRecFunc; - // return nullptr; - // } - // - - // - // // 0x02846c74 - // if (ppcRecFunc->ppcAddress == 0x02846c74) - // { - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - // } - - // Shovel Knight - // if (ppcRecFunc->ppcAddress >= 0x02A1E630 && ppcRecFunc->ppcAddress < 0x02A1E9D8) - // { - // // debug_printf("----------------------------------------\n"); - // // IMLDebug_Dump(&ppcImlGenContext); - // // __debugbreak(); - // delete ppcRecFunc; - // return nullptr; - // } - // - // // - // if (ppcRecFunc->ppcAddress == 0x02ade5c4 || ppcRecFunc->ppcAddress == 0x02ade5c8) - // { - // // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - // } - - // else - // { - // delete ppcRecFunc; - // return nullptr; - // } - - //if (ppcRecFunc->ppcAddress == 0x11223344) - //{ - // //debug_printf("----------------------------------------\n"); - // //IMLDebug_Dump(&ppcImlGenContext); - // //__debugbreak(); - //} - //else - //{ - // delete ppcRecFunc; - // return nullptr; - //} - - // if (ppcRecFunc->ppcAddress >= 0x2BDA9F4 && ppcRecFunc->ppcAddress < 0x02BDAB38) - // { - // return nullptr; - // //IMLDebug_Dump(&ppcImlGenContext); - // //__debugbreak(); - // } - // if (ppcRecFunc->ppcAddress == 0x2BDA9F4) // { // IMLDebug_Dump(&ppcImlGenContext); // __debugbreak(); // } - // 31A8778 - - // if(ppcRecFunc->ppcAddress >= 0x2759E20 && ppcRecFunc->ppcAddress < 0x0275A0CC) - // { - // delete ppcRecFunc; - // return nullptr; - // } // Functions for testing (botw): // 3B4049C (large with switch case) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index ed3cfa1ee..932aac8c7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2876,75 +2876,6 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; - // DEBUG BEGIN - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4+0x10) -> stops bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0) -> has bug (optional code path) - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA50) -> stops bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC34) -> stops bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78) -> has bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70) -> has bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC88) -> has bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C) -> has bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38) -> no bug - // weirdly, excluding 0x02B7AC38 fixes the issue. Excluding both 0x02B7AC3C and 0x2B7AC88 (the follow up instructions) does not fix the bug - - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABE4) -> has bug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AAD0) -> fixes bug - - // maybe try to place as many leave instructions as possible while keeping the bug alive - // eventually we should end up with a relatively small IR footprint that is easier to analyze - - // 0x023d5818 - // SM3DW debug - // if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x23D58A8) - // { - // ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); - // } - -#if 0 // TP - if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABE4 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABC0 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABA8 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB90 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB04 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7abc4 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9B0 || // verified - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa10 -> fixes bug (this is after a bl) - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AA3C || // verified - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA44 -> fixes bug (this is on the main path, the one before, 0x02B7AA3C, does not break) - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABC4 || // verified - ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7ac88 || // verified - // ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aad0 || -> fixes it - // ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa30 || -> fixes it (mostly. There was a small glitch on eponas tail?) - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa24 || -> this fixes it - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A918 || -> this fixes it - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9A0 || -> this fixes it - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38 || -> this fixes it - //ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4 || -> this fixes it - (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AC44 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7AC84) || // verified - (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AADC && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7ABC0) || // verified - (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7A9B0 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA0C) || - (ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AAE4 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02b7ac20) // verified - - // disabling IMLOptimizerX86_SubstituteCJumpForEflagsJump fixes it... - - //(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AA1C && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA40) -> fixes it - ) - { - ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); - // this doesnt work any longer because the basic blocks are determined before the recompiler is called - basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchTaken(nullptr); - basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchNotTaken(nullptr); - break; // but we should be able to just exit the block early? - } -#endif - if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) { debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); From 002a03df3d0f2ed5365a517f85391cda8e98bf0b Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 20 Oct 2024 04:25:21 +0200 Subject: [PATCH 49/64] PPCRec: Implement MCRF, rework DCBZ --- .../Recompiler/BackendX64/BackendX64.cpp | 21 ----- .../Recompiler/IML/IMLInstruction.cpp | 12 ++- .../Espresso/Recompiler/IML/IMLInstruction.h | 2 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 7 +- .../Recompiler/PPCRecompilerImlGen.cpp | 83 ++++++++++--------- 5 files changed, 57 insertions(+), 68 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 8140d7512..38c51fd2b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -671,27 +671,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { x64GenContext->emitter->CMP_dd(regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) - { - if( regR != regA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); - for(sint32 f=0; f<0x20; f+=8) - x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0); - } - else - { - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); - for(sint32 f=0; f<0x20; f+=8) - x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0); - } - } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 4b56ff94e..d8bcf6fdd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -4,6 +4,15 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" +bool IMLInstruction::HasSideEffects() const +{ + bool hasSideEffects = true; + if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32) + hasSideEffects = false; + // todo - add more cases + return hasSideEffects; +} + void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { registersUsed->readGPR1 = IMLREG_INVALID; @@ -26,8 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_R) { - if (operation == PPCREC_IML_OP_DCBZ || - operation == PPCREC_IML_OP_X86_CMP) + if (operation == PPCREC_IML_OP_X86_CMP) { // both operands are read only registersUsed->readGPR1 = op_r_r.regR; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index e7c58e8eb..59a8504fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -126,7 +126,6 @@ enum PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, - PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -818,6 +817,7 @@ struct IMLInstruction } void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; + bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used void RewriteGPR(const std::unordered_map& translationTable); void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 2856eb247..e5bec6c0f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -649,7 +649,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal // Then for each segment: // - Iterate instructions backwards // - Maintain a list of registers which are read at a later point (initially this is the list from the first step) - // - If an instruction only modifies registers which are not in the read list, then it is dead code and can be replaced with a no-op + // - If an instruction only modifies registers which are not in the read list and has no side effects, then it is dead code and can be replaced with a no-op std::unordered_set regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg); @@ -688,10 +688,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal registersUsed.ForEachReadGPR([&](IMLReg reg) { regsNeeded.insert(reg.GetRegID()); }); - // for now we only allow some instruction types to be deleted, eventually we should find a safer way to identify side effects that can't be judged by register usage alone - if(imlInstruction.type != PPCREC_IML_TYPE_R_R && imlInstruction.type != PPCREC_IML_TYPE_R_R_S32 && imlInstruction.type != PPCREC_IML_TYPE_COMPARE && imlInstruction.type != PPCREC_IML_TYPE_COMPARE_S32) - continue; - if(onlyWritesRedundantRegisters) + if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters) { imlInstruction.make_no_op(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 932aac8c7..d10640f42 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -422,17 +422,22 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return true; } return false; +} - if (spr == 268 || spr == 269) +void PPCRecompilerImlGen_MCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 crD, crS, b; + PPC_OPC_TEMPL_X(opcode, crD, crS, b); + cemu_assert_debug((crD&3) == 0); + cemu_assert_debug((crS&3) == 0); + crD >>= 2; + crS >>= 2; + for (sint32 i = 0; i<4; i++) { - // TBL / TBU - uint32 param2 = spr | (rD << 16); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0, IMLREG_INVALID); - IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); - - return true; + IMLReg regCrSrcBit = _GetRegCR(ppcImlGenContext, crS * 4 + i); + IMLReg regCrDstBit = _GetRegCR(ppcImlGenContext, crD * 4 + i); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrDstBit, regCrSrcBit); } - return false; } bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1211,12 +1216,12 @@ bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return true; } -bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) +void PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) { + // if rA == rD, then the EA wont be stored to rA. We could set updateAddrReg to false in such cases but the end result is the same since the loaded value would overwrite rA sint32 rA, rD, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if (updateAddrReg && (rA == 0 || rD == rB)) - return false; // invalid instruction form + updateAddrReg = updateAddrReg && (rA != 0); IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); @@ -1231,7 +1236,6 @@ bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian); else ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian); - return true; } bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) @@ -1498,13 +1502,21 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod rA = (opcode>>16)&0x1F; rB = (opcode>>11)&0x1F; // prepare registers - IMLReg gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID; - IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // store - if( rA != 0 ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); + IMLReg regA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + // load zero into a temporary register + IMLReg regZero = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regZero, 0); + // prepare EA and align it to cacheline + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + if(rA != 0) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31); + // zero out the cacheline + for(sint32 i = 0; i < 32; i += 4) + ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false); return true; } @@ -2069,6 +2081,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 19: // opcode category 19 switch (PPC_getBits(opcode, 30, 10)) { + case 0: + PPCRecompilerImlGen_MCRF(ppcImlGenContext, opcode); + break; case 16: // BCLR if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false) unsupportedInstructionFound = true; @@ -2170,8 +2185,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 23: // LWZX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false); break; case 24: if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false) @@ -2196,8 +2210,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) // DBCST - Generates no code break; case 55: // LWZUX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true); break; case 60: // ANDC if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode)) @@ -2211,16 +2224,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) // DCBF -> No-Op break; case 87: // LBZX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false); break; case 104: if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; case 119: // LBZUX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true); break; case 124: // NOR if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true)) @@ -2279,16 +2290,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 279: // LHZX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false); break; case 284: // EQV (alias to NXOR) if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true)) unsupportedInstructionFound = true; break; case 311: // LHZUX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true); break; case 316: // XOR if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false)) @@ -2299,16 +2308,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 343: // LHAX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false); break; case 371: if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; case 375: // LHAUX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true); break; case 407: // STHX if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false)) @@ -2342,8 +2349,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 534: // LWBRX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false); break; case 535: if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false) @@ -2397,8 +2403,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 790: // LHBRX - if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false)) - unsupportedInstructionFound = true; + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false); break; case 792: if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false) @@ -2878,7 +2883,7 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) { - debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); + cemuLog_logDebug(LogType::Force, "PPCRecompiler: Unsupported instruction at 0x{:08x}", addressOfCurrentInstruction); ppcImlGenContext.currentOutputSegment = nullptr; return false; } From 608757dbeb7c980c9f5c09947a570b3fdcd9d025 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 23 Oct 2024 08:36:12 +0200 Subject: [PATCH 50/64] PPCRec: Fixes and optimizations + rework FRES/FRSQRTE --- .../Interpreter/PPCInterpreterFPU.cpp | 4 +- .../Interpreter/PPCInterpreterInternal.h | 4 +- .../Recompiler/BackendX64/BackendX64.cpp | 2 + .../Recompiler/BackendX64/BackendX64FPU.cpp | 12 - .../Recompiler/IML/IMLInstruction.cpp | 1 - .../Espresso/Recompiler/IML/IMLInstruction.h | 1 - .../Recompiler/IML/IMLRegisterAllocator.cpp | 635 ++++++++++-------- .../Recompiler/IML/IMLRegisterAllocator.h | 12 +- .../IML/IMLRegisterAllocatorRanges.cpp | 64 +- .../IML/IMLRegisterAllocatorRanges.h | 3 + .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 12 - .../Recompiler/PPCRecompilerImlGen.cpp | 2 +- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 12 +- 13 files changed, 409 insertions(+), 355 deletions(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp index aed571d71..2c99b84ca 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp @@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] = {0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6}, }; -double frsqrte_espresso(double input) +ATTR_MS_ABI double frsqrte_espresso(double input) { unsigned long long x = *(unsigned long long*)&input; @@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] = {0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106} }; -double fres_espresso(double input) +ATTR_MS_ABI double fres_espresso(double input) { // based on testing we know that fres uses only the first 15 bits of the mantissa // seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h index bac253c4e..896fd21cf 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h @@ -191,8 +191,8 @@ inline double roundTo25BitAccuracy(double d) return *(double*)&v; } -double fres_espresso(double input); -double frsqrte_espresso(double input); +ATTR_MS_ABI double fres_espresso(double input); +ATTR_MS_ABI double frsqrte_espresso(double input); void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 38c51fd2b..f08f0524c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -601,8 +601,10 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here + x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x28); // reserve enough space for any parameters while keeping stack alignment of 16 intact x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress); x64GenContext->emitter->CALL_q(X86_REG_RAX); + x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x28); } bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index cff46a2d0..af19cae96 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -780,18 +780,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) - { - // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - - // call assembly routine to calculate accurate FRES result in XMM15 - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); - - // copy result to bottom and top half of result register - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { // move register to XMM15 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index d8bcf6fdd..5d90ea7fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -363,7 +363,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || operation == PPCREC_IML_OP_ASSIGN || - operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP || operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || operation == PPCREC_IML_OP_FPR_ABS_PAIR || operation == PPCREC_IML_OP_FPR_FRES_PAIR || diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 59a8504fa..4394176b5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -143,7 +143,6 @@ enum PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half - PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index ca4280283..5309b127b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -10,9 +10,16 @@ #include #include +#include "Common/cpu_features.h" + +#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed +#define DEBUG_RA_INSTRUCTION_GEN 0 + + struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment { - IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {}; + IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) + : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {}; void TrackInstruction(sint32 index) { @@ -34,7 +41,6 @@ struct IMLRegisterAllocatorContext std::unordered_map regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary // first pass std::vector> perSegmentAbstractRanges; - // second pass // helper methods inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment) @@ -48,78 +54,143 @@ struct IMLRegisterAllocatorContext cemu_assert_debug(it != regIdToBaseFormat.cend()); return it->second; } - }; struct IMLFixedRegisters { struct Entry { - Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) : reg(reg), physRegSet(physRegSet) {} + Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) + : reg(reg), physRegSet(physRegSet) {} IMLReg reg; IMLPhysRegisterSet physRegSet; }; - boost::container::small_vector listInput; // fixed registers for instruction input edge - boost::container::small_vector listOutput; // fixed registers for instruction output edge + boost::container::small_vector listInput; // fixed register requirements for instruction input edge + boost::container::small_vector listOutput; // fixed register requirements for instruction output edge }; +static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters) +{ + sint32 numIntParams = 0, numFloatParams = 0; + + auto AddParameterMapping = [&](IMLReg reg) { + if (!reg.IsValid()) + return; + if (reg.GetBaseFormat() == IMLRegFormat::I64) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(intParamToPhysReg[numIntParams]); + fixedRegs.listInput.emplace_back(reg, ps); + numIntParams++; + } + else if (reg.GetBaseFormat() == IMLRegFormat::F64) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(floatParamToPhysReg[numFloatParams]); + fixedRegs.listInput.emplace_back(reg, ps); + numFloatParams++; + } + else + { + cemu_assert_suspicious(); + } + }; + AddParameterMapping(instruction->op_call_imm.regParam0); + AddParameterMapping(instruction->op_call_imm.regParam1); + AddParameterMapping(instruction->op_call_imm.regParam2); + // return value + if (instruction->op_call_imm.regReturn.IsValid()) + { + IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat(); + bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8; + IMLPhysRegisterSet ps; + if (isIntegerFormat) + { + ps.SetAvailable(intReturnPhysReg); + volatileRegisters.SetReserved(intReturnPhysReg); + } + else + { + ps.SetAvailable(floatReturnPhysReg); + volatileRegisters.SetReserved(floatReturnPhysReg); + } + fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps); + } + // block volatile registers from being used on the output edge, this makes the register allocator store them during the call + fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters); +} + +#if defined(__aarch64__) +// aarch64 static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) { fixedRegs.listInput.clear(); fixedRegs.listOutput.clear(); - // x86 specific logic is hardcoded for now - if(instruction->type == PPCREC_IML_TYPE_R_R_R) + // code below for aarch64 has not been tested + // The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it + // on x86 this is used for instructions like SHL , CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention + cemu_assert_unimplemented(); +#ifdef 0 + if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2}; + const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2}; + IMLPhysRegisterSet volatileRegs; + for (int i=0; i<19; i++) // x0 to x18 are volatile + volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i); + for (int i = 0; i <= 31; i++) // which float registers are volatile? + volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i); + SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs); + } +#endif +} +#else +// x86-64 +static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) +{ + fixedRegs.listInput.clear(); + fixedRegs.listOutput.clear(); + + if (instruction->type == PPCREC_IML_TYPE_R_R_R) { - if(instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) { - // todo: We can skip this if g_CPUFeatures.x86.bmi2 is set, but for now we just assume it's not so we can properly test increased register pressure - IMLPhysRegisterSet ps; - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_ECX); - fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps); + if(!g_CPUFeatures.x86.bmi2) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX); + fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps); + } } } - else if(instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { IMLPhysRegisterSet ps; - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX); fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); } - else if(instruction->type == PPCREC_IML_TYPE_CALL_IMM) - { - // parameters (todo) - cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid()); - cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid()); - cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid()); - // return value - if(instruction->op_call_imm.regReturn.IsValid()) - { - IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat(); - bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8; - cemu_assert_debug(isIntegerFormat); // float return values are still todo - IMLPhysRegisterSet ps; - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); - fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps); - } - // block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call - IMLPhysRegisterSet ps; - if(!instruction->op_call_imm.regReturn.IsValid()) - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10); - ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11); - for(int i=0; i<=5; i++) - ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile + else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8}; + const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2}; + IMLPhysRegisterSet volatileRegs; + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11); + // YMM0-YMM5 are volatile + for (int i = 0; i <= 5; i++) + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i); // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use - fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps); + SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs); } - } - +#endif uint32 PPCRecRA_getNextIterationIndex() { @@ -138,7 +209,7 @@ bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex return false; currentSegment->raInfo.lastIterationIndex = iterationIndex; currentSegment->raInfo.isPartOfProcessedLoop = false; - + if (currentSegment->nextSegmentIsUncertain) return false; if (currentSegment->nextSegmentBranchNotTaken) @@ -188,12 +259,12 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml } } -#define SUBRANGE_LIST_SIZE (128) +#define SUBRANGE_LIST_SIZE (128) sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition) { sint32 startInstructionIndex; - if(startPosition.ConnectsToPreviousSegment()) + if (startPosition.ConnectsToPreviousSegment()) startInstructionIndex = 0; else startInstructionIndex = startPosition.GetInstructionIndex(); @@ -204,22 +275,22 @@ sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstruct sint32 preciseIndex = subrange->list_locations[i].index * 2; cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access // check read edge - if(subrange->list_locations[i].isRead) + if (subrange->list_locations[i].isRead) { - if(preciseIndex >= startPosition.GetRaw()) + if (preciseIndex >= startPosition.GetRaw()) return preciseIndex - startPosition.GetRaw(); } // check write edge - if(subrange->list_locations[i].isWrite) + if (subrange->list_locations[i].isWrite) { preciseIndex++; - if(preciseIndex >= startPosition.GetRaw()) + if (preciseIndex >= startPosition.GetRaw()) return preciseIndex - startPosition.GetRaw(); } } } cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000); - return 10001*2; + return 10001 * 2; } // returns -1 if there is no fixed register requirement on or after startPosition @@ -227,11 +298,11 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLi { hasFixedAccess = false; cemu_assert_debug(startPosition.IsInstructionIndex()); - for(auto& fixedReqEntry : range->list_fixedRegRequirements) + for (auto& fixedReqEntry : range->list_fixedRegRequirements) { - if(fixedReqEntry.pos < startPosition) + if (fixedReqEntry.pos < startPosition) continue; - if(fixedReqEntry.allowedReg.IsAvailable(physRegister)) + if (fixedReqEntry.allowedReg.IsAvailable(physRegister)) { hasFixedAccess = true; return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw(); @@ -249,24 +320,24 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi raInstructionEdge endPos; endPos = startPosition + maxDistance; - if(endPos > lastPos2) + if (endPos > lastPos2) endPos = lastPos2; IMLFixedRegisters fixedRegs; - if(startPosition.IsOnOutputEdge()) - GetInstructionFixedRegisters(imlSegment->imlList.data()+startPosition.GetInstructionIndex(), fixedRegs); - for(raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos) + if (startPosition.IsOnOutputEdge()) + GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs); + for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos) { - if(currentPos.IsOnInputEdge()) + if (currentPos.IsOnInputEdge()) { - GetInstructionFixedRegisters(imlSegment->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs); + GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs); } auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; - for(auto& fixedRegLoc : fixedRegAccess) + for (auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) + if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) { cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers - if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) + if (fixedRegLoc.physRegSet.IsAvailable(physRegister)) return currentPos.GetRaw() - startPosition.GetRaw(); } } @@ -278,17 +349,17 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister) { cemu_assert_debug(startPosition.IsInstructionIndex()); - sint32 minDistance = (sint32)imlSegment->imlList.size()*2 - startPosition.GetRaw(); + sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw(); // next raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) + while (subrangeItr) { if (subrangeItr->GetPhysicalRegister() != physRegister) { subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if(subrangeItr->interval2.ContainsEdge(startPosition)) + if (subrangeItr->interval2.ContainsEdge(startPosition)) return 0; if (subrangeItr->interval2.end < startPosition) { @@ -315,32 +386,6 @@ struct IMLRALivenessTimeline activeRanges.emplace_back(subrange); } - // remove all ranges from activeRanges with end <= instructionIndex - void ExpireRanges(sint32 instructionIndex) - { - __debugbreak(); // maybe replace calls with raInstructionEdge variant? - expiredRanges.clear(); - size_t count = activeRanges.size(); - for (size_t f = 0; f < count; f++) - { - raLivenessRange* liverange = activeRanges[f]; - if (liverange->interval2.end.GetInstructionIndex() < instructionIndex) // <= to < since end is now inclusive - { -#ifdef CEMU_DEBUG_ASSERT - if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) - assert_dbg(); // infinite subranges should not expire -#endif - expiredRanges.emplace_back(liverange); - // remove entry - activeRanges[f] = activeRanges[count-1]; - f--; - count--; - } - } - if(count != activeRanges.size()) - activeRanges.resize(count); - } - void ExpireRanges(raInstructionEdge expireUpTo) { expiredRanges.clear(); @@ -356,29 +401,29 @@ struct IMLRALivenessTimeline #endif expiredRanges.emplace_back(liverange); // remove entry - activeRanges[f] = activeRanges[count-1]; + activeRanges[f] = activeRanges[count - 1]; f--; count--; } } - if(count != activeRanges.size()) + if (count != activeRanges.size()) activeRanges.resize(count); } std::span GetExpiredRanges() { - return { expiredRanges.data(), expiredRanges.size() }; + return {expiredRanges.data(), expiredRanges.size()}; } std::span GetActiveRanges() { - return { activeRanges.data(), activeRanges.size() }; + return {activeRanges.data(), activeRanges.size()}; } raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId) { - for(auto& it : activeRanges) - if(it->virtualRegister == regId) + for (auto& it : activeRanges) + if (it->virtualRegister == regId) return it; return nullptr; } @@ -386,15 +431,15 @@ struct IMLRALivenessTimeline raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg) { cemu_assert_debug(physReg >= 0); - for(auto& it : activeRanges) - if(it->physicalRegister == physReg) + for (auto& it : activeRanges) + if (it->physicalRegister == physReg) return it; return nullptr; } boost::container::small_vector activeRanges; -private: + private: boost::container::small_vector expiredRanges; }; @@ -406,7 +451,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP { IMLSegment* imlSegment = subrange->imlSegment; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) + while (subrangeItr) { if (subrange == subrangeItr) { @@ -414,7 +459,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if(subrange->interval2.IsOverlapping(subrangeItr->interval2)) + if (subrange->interval2.IsOverlapping(subrangeItr->interval2)) { if (subrangeItr->GetPhysicalRegister() >= 0) physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); @@ -425,11 +470,14 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP } } -bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->interval2.start < rhs->interval2.start; } +bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) +{ + return lhs->interval2.start < rhs->interval2.start; +} void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) { - raLivenessRange* subrangeList[4096+1]; + raLivenessRange* subrangeList[4096 + 1]; sint32 count = 0; // disassemble linked list raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; @@ -460,7 +508,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1]; } // validate list -#ifdef CEMU_DEBUG_ASSERT +#if DEBUG_RA_EXTRA_VALIDATION sint32 count2 = 0; subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raInstructionEdge currentStartPosition; @@ -511,14 +559,14 @@ std::vector IMLRA_BuildSegmentInstructionFixedReg GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); raInstructionEdge pos; pos.Set(index, true); - for(auto& fixedRegAccess : fixedRegs.listInput) + for (auto& fixedRegAccess : fixedRegs.listInput) { frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); } pos = pos + 1; - for(auto& fixedRegAccess : fixedRegs.listOutput) + for (auto& fixedRegAccess : fixedRegs.listOutput) { - frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid()?fixedRegAccess.reg.GetRegID():IMLRegID_INVALID); + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID); } index++; } @@ -528,14 +576,14 @@ std::vector IMLRA_BuildSegmentInstructionFixedReg boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg) { boost::container::small_vector rangeList; - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { - if(!currentRange->interval2.ContainsEdge(pos)) + if (!currentRange->interval2.ContainsEdge(pos)) continue; IMLPhysRegisterSet allowedRegs; - if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) continue; - if(allowedRegs.IsAvailable(physReg)) + if (allowedRegs.IsAvailable(physReg)) rangeList.emplace_back(currentRange); } return rangeList; @@ -544,16 +592,16 @@ boost::container::small_vector IMLRA_GetRangeWithFixedRegRe void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border - // todo - this can be optimized. Ranges only need to be split if there are conflicts with other segments. Note that below passes rely on the fact that this pass currently splits all ranges with fixed register requirements - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) + // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This isn't always necessary + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) { IMLPhysRegisterSet allowedRegs; - if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) { currentRange = currentRange->link_allSegmentRanges.next; continue; } - if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + if (currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) { raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next; PPCRecRA_explodeRange(ppcImlGenContext, currentRange); @@ -563,61 +611,60 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment currentRange = currentRange->link_allSegmentRanges.next; } // second pass - look for ranges with conflicting fixed register requirements and split these too (locally) - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; - if(currentRange->list_fixedRegRequirements.empty()) + if (currentRange->list_fixedRegRequirements.empty()) continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment - if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) continue; - if(allowedRegs.HasAnyAvailable()) + if (allowedRegs.HasAnyAvailable()) continue; cemu_assert_unimplemented(); } // third pass - assign fixed registers, split ranges if needed std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment); std::unordered_map lastVGPR; - for(size_t i=0; i= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg+1)) + for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1)) { // check if the assigned vGPR has changed bool vgprHasChanged = false; auto it = lastVGPR.find(physReg); - if(it != lastVGPR.end()) + if (it != lastVGPR.end()) vgprHasChanged = it->second != entry.regId; else vgprHasChanged = true; lastVGPR[physReg] = entry.regId; - if(!vgprHasChanged) + if (!vgprHasChanged) continue; boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); - if(entry.regId != IMLRegID_INVALID) + if (entry.regId != IMLRegID_INVALID) cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers - for(auto& range : overlappingRanges) + for (auto& range : overlappingRanges) { - if(range->interval2.start < entry.pos) + if (range->interval2.start < entry.pos) { PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true); } } - } } // finally iterate ranges and assign fixed registers - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; - if(currentRange->list_fixedRegRequirements.empty()) + if (currentRange->list_fixedRegRequirements.empty()) continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment - if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) { cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); continue; @@ -626,26 +673,26 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg()); } // DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned -#ifdef CEMU_DEBUG_ASSERT - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) +#if DEBUG_RA_EXTRA_VALIDATION + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; - if(!currentRange->HasPhysicalRegister()) + if (!currentRange->HasPhysicalRegister()) continue; - for(raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next) + for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next) { - if(currentRange == currentRange2) + if (currentRange == currentRange2) continue; - if(currentRange->interval2.IsOverlapping(currentRange2->interval2)) + if (currentRange->interval2.IsOverlapping(currentRange2->interval2)) { cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister()); } } } - for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { IMLPhysRegisterSet allowedRegs; - if(!currentRange->GetAllowedRegistersEx(allowedRegs)) + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) { cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); continue; @@ -661,7 +708,7 @@ void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos) { // we ignore the instruction for now and just always make it a safe split position cemu_assert_debug(pos.IsInstructionIndex()); - if(pos.IsOnOutputEdge()) + if (pos.IsOnOutputEdge()) pos = pos - 1; } @@ -672,7 +719,7 @@ void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge start cemu_assert_debug(distance >= 0); raInstructionEdge endPos = startPos + distance; IMLRA_MakeSafeSplitPosition(imlSegment, endPos); - if(endPos < startPos) + if (endPos < startPos) { distance = 0; return; @@ -684,7 +731,7 @@ void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx); class RASpillStrategy { -public: + public: virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0; sint32 GetCost() @@ -692,7 +739,7 @@ class RASpillStrategy return strategyCost; } -protected: + protected: void ResetCost() { strategyCost = INT_MAX; @@ -703,7 +750,7 @@ class RASpillStrategy class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy { -public: + public: void Reset() { localRangeHoleCutting.distance = -1; @@ -717,7 +764,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); cemu_assert_debug(localRangeHoleCutting.distance == -1); cemu_assert_debug(strategyCost == INT_MAX); - if(!currentRangeStart.ConnectsToPreviousSegment()) + if (!currentRangeStart.ConnectsToPreviousSegment()) { cemu_assert_debug(currentRangeStart.GetRaw() >= 0); for (auto candidate : timeline.activeRanges) @@ -725,11 +772,11 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy if (candidate->interval2.ExtendsIntoNextSegment()) continue; // new checks (Oct 2024): - if(candidate == currentRange) + if (candidate == currentRange) continue; - if(candidate->GetPhysicalRegister() < 0) + if (candidate->GetPhysicalRegister() < 0) continue; - if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; sint32 distance2 = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); @@ -737,7 +784,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy if (distance2 < 2) continue; cemu_assert_debug(currentRangeStart.IsInstructionIndex()); - distance2 = std::min(distance2, imlSegment->imlList.size()*2 - currentRangeStart.GetRaw()); // limit distance to end of segment + distance2 = std::min(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment // calculate split cost of candidate sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2); // calculate additional split cost of currentRange if hole is not large enough @@ -768,11 +815,11 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance; raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange; - if(collisionRange->interval2.start < holeStartPosition) + if (collisionRange->interval2.start < holeStartPosition) { collisionRange = PPCRecRA_splitLocalSubrange2(nullptr, collisionRange, holeStartPosition, true); cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point - cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough + cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough } else { @@ -782,31 +829,31 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy if (requiredSize2 > localRangeHoleCutting.distance) { raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true); - if(tailRange) + if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers tailRange->UnsetPhysicalRegister(); } } // verify that the hole is large enough - if(collisionRange) + if (collisionRange) { cemu_assert_debug(!collisionRange->interval2.IsOverlapping(currentRange->interval2)); } } -private: + private: struct { sint32 distance; raLivenessRange* largestHoleSubrange; - }localRangeHoleCutting; + } localRangeHoleCutting; }; class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy { // split current range (this is generally only a good choice when the current range is long but has few usages) - public: + public: void Reset() { ResetCost(); @@ -832,14 +879,14 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); if (physRegItr < 0) break; - if(!allowedRegs.IsAvailable(physRegItr)) + if (!allowedRegs.IsAvailable(physRegItr)) continue; // get size of potential hole for this register sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr); // some instructions may require the same register for another range, check the distance here sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr); - if(distUntilFixedReg < distance) + if (distUntilFixedReg < distance) distance = distUntilFixedReg; IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); @@ -867,24 +914,24 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy raInstructionEdge currentRangeStart = currentRange->interval2.start; // use available register raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true); - if(tailRange) + if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers tailRange->UnsetPhysicalRegister(); } } - private: + private: struct { sint32 physRegister; sint32 distance; // size of hole - }availableRegisterHole; + } availableRegisterHole; }; class RASpillStrategy_ExplodeRange : public RASpillStrategy { -public: + public: void Reset() { ResetCost(); @@ -895,7 +942,7 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) { raInstructionEdge currentRangeStart = currentRange->interval2.start; - if(currentRangeStart.ConnectsToPreviousSegment()) + if (currentRangeStart.ConnectsToPreviousSegment()) currentRangeStart.Set(0, true); sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); cemu_assert_debug(strategyCost == INT_MAX); @@ -906,16 +953,16 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy if (!candidate->interval2.ExtendsIntoNextSegment()) continue; // new checks (Oct 2024): - if(candidate == currentRange) + if (candidate == currentRange) continue; - if(candidate->GetPhysicalRegister() < 0) + if (candidate->GetPhysicalRegister() < 0) continue; - if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; sint32 distance = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); - if( distance < 2) + if (distance < 2) continue; sint32 cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); // if the hole is not large enough, add cost of splitting current subrange @@ -938,16 +985,16 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override { raInstructionEdge currentRangeStart = currentRange->interval2.start; - if(currentRangeStart.ConnectsToPreviousSegment()) + if (currentRangeStart.ConnectsToPreviousSegment()) currentRangeStart.Set(0, true); sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); // explode range PPCRecRA_explodeRange(nullptr, explodeRange.range); // split current subrange if necessary - if( requiredSize2 > explodeRange.distance) + if (requiredSize2 > explodeRange.distance) { - raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart+explodeRange.distance, true); - if(tailRange) + raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + explodeRange.distance, true); + if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers tailRange->UnsetPhysicalRegister(); @@ -955,19 +1002,18 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy } } -private: + private: struct { raLivenessRange* range; sint32 distance; // size of hole // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange - }explodeRange; + } explodeRange; }; - class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy { -public: + public: void Reset() { ResetCost(); @@ -980,7 +1026,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy // explode the range with the least cost cemu_assert_debug(strategyCost == INT_MAX); cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1); - for(auto candidate : timeline.activeRanges) + for (auto candidate : timeline.activeRanges) { if (!candidate->interval2.ExtendsIntoNextSegment()) continue; @@ -988,7 +1034,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange) continue; // and also filter any that dont meet fixed register requirements - if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; sint32 cost; cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); @@ -1017,7 +1063,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy PPCRecRA_explodeRange(ctx, explodeRange.range); } -private: + private: struct { raLivenessRange* range; @@ -1030,37 +1076,37 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) { IMLSegment* seg = currentRange->imlSegment; - if(seg->imlList.empty()) + if (seg->imlList.empty()) return; // there can be no fixed register requirements if there are no instructions raInstructionEdge firstPos = currentRange->interval2.start; - if(currentRange->interval2.start.ConnectsToPreviousSegment()) + if (currentRange->interval2.start.ConnectsToPreviousSegment()) firstPos.SetRaw(0); - else if(currentRange->interval2.start.ConnectsToNextSegment()) - firstPos.Set(seg->imlList.size()-1, false); + else if (currentRange->interval2.start.ConnectsToNextSegment()) + firstPos.Set(seg->imlList.size() - 1, false); raInstructionEdge lastPos = currentRange->interval2.end; - if(currentRange->interval2.end.ConnectsToPreviousSegment()) + if (currentRange->interval2.end.ConnectsToPreviousSegment()) lastPos.SetRaw(0); - else if(currentRange->interval2.end.ConnectsToNextSegment()) - lastPos.Set(seg->imlList.size()-1, false); + else if (currentRange->interval2.end.ConnectsToNextSegment()) + lastPos.Set(seg->imlList.size() - 1, false); cemu_assert_debug(firstPos <= lastPos); IMLRegID ourRegId = currentRange->GetVirtualRegister(); IMLFixedRegisters fixedRegs; - if(firstPos.IsOnOutputEdge()) - GetInstructionFixedRegisters(seg->imlList.data()+firstPos.GetInstructionIndex(), fixedRegs); - for(raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos) + if (firstPos.IsOnOutputEdge()) + GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs); + for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos) { - if(currentPos.IsOnInputEdge()) + if (currentPos.IsOnInputEdge()) { - GetInstructionFixedRegisters(seg->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs); + GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs); } auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; - for(auto& fixedRegLoc : fixedRegAccess) + for (auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) + if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); } } @@ -1070,13 +1116,13 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) { cemu_assert_debug(currentRange->imlSegment == imlSegment); - if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + if (currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) { auto clusterRanges = currentRange->GetAllSubrangesInCluster(); - for(auto& rangeIt : clusterRanges) + for (auto& rangeIt : clusterRanges) { IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet); - if(!candidatePhysRegSet.HasAnyAvailable()) + if (!candidatePhysRegSet.HasAnyAvailable()) break; } return; @@ -1101,22 +1147,20 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon RASpillStrategy_ExplodeRange explodeRange; // for ranges that connect to follow up segments: RASpillStrategy_ExplodeRangeInter explodeRangeInter; - }strategy; + } strategy; - while(subrangeItr) + while (subrangeItr) { raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor PPCRecRA_debugValidateSubrange(subrangeItr); - // below used to be: std::min(currentIndex, RA_INTER_RANGE_END-1) livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges - // note: The logic here is complicated in regards to whether the instruction index should be inclusive or exclusive. Find a way to simplify? // if subrange already has register assigned then add it to the active list and continue if (subrangeItr->GetPhysicalRegister() >= 0) { // verify if register is actually available -#ifdef CEMU_DEBUG_ASSERT +#if DEBUG_RA_EXTRA_VALIDATION for (auto& liverangeItr : livenessTimeline.activeRanges) { // check for register mismatch @@ -1128,7 +1172,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon continue; } // ranges with fixed register requirements should already have a phys register assigned - if(!subrangeItr->list_fixedRegRequirements.empty()) + if (!subrangeItr->list_fixedRegRequirements.empty()) { cemu_assert_debug(subrangeItr->HasPhysicalRegister()); } @@ -1148,7 +1192,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon } // check intersections with other ranges and determine allowed registers IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments) - if(candidatePhysRegSet.HasAnyAvailable()) + if (candidatePhysRegSet.HasAnyAvailable()) { // check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments) PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet); @@ -1160,7 +1204,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon { IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet); } - if(candidatePhysRegSet.HasAnyAvailable()) + if (candidatePhysRegSet.HasAnyAvailable()) { // use free register subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg()); @@ -1176,11 +1220,10 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon // cant assign register // there might be registers available, we just can't use them due to range conflicts RASpillStrategy* selectedStrategy = nullptr; - auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) - { - if(newStrategy.GetCost() == INT_MAX) + auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) { + if (newStrategy.GetCost() == INT_MAX) return; - if(selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost()) + if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost()) selectedStrategy = &newStrategy; }; @@ -1205,7 +1248,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon SelectStrategyIfBetter(strategy.explodeRangeInter); } // choose strategy - if(selectedStrategy) + if (selectedStrategy) { selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr); } @@ -1335,7 +1378,7 @@ void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same #endif } - }); + }); instructionIndex++; } } @@ -1366,14 +1409,12 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx } abstractRange->isProcessed = true; // create subrange -#ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); -#endif cemu_assert_debug( - (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) || - abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger + (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) || + abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger sint32 inclusiveEnd = abstractRange->usageEnd; - if(inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END) + if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END) inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive raInterval interval; interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true); @@ -1408,22 +1449,12 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx for (auto& it : imlSegment->list_prevSegments) { IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR); - if(!prevRange) + if (!prevRange) continue; if (prevRange->usageEnd == RA_INTER_RANGE_END) PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name); } } - // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction - // this is due to range load instructions being inserted before the suffix instruction - // todo - currently later steps might break this assumption, look into this - // if (subrange->interval2.ExtendsIntoNextSegment()) - // { - // if (imlSegment->HasSuffixInstruction()) - // { - // cemu_assert_debug(subrange->interval2.start.GetInstructionIndex() <= imlSegment->GetSuffixInstructionIndex()); - // } - // } return subrange; } @@ -1432,14 +1463,13 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML { const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); - auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) - { + auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) { raLivenessRange* subrange = regToSubrange.find(regId)->second; cemu_assert_debug(subrange); raFixedRegRequirement tmp; tmp.pos.Set(instructionIndex, isInput); tmp.allowedReg = physRegSet; - if(subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos) + if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos) subrange->list_fixedRegRequirements.push_back(tmp); }; @@ -1447,7 +1477,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); for (auto& it : segMap) { - if(it.second.isProcessed) + if (it.second.isProcessed) continue; IMLRegID regId = it.first; PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second); @@ -1466,18 +1496,18 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index); cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index); - }); + }); // check fixed register requirements IMLFixedRegisters fixedRegs; GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); - for(auto& fixedRegAccess : fixedRegs.listInput) + for (auto& fixedRegAccess : fixedRegs.listInput) { - if(fixedRegAccess.reg != IMLREG_INVALID) + if (fixedRegAccess.reg != IMLREG_INVALID) AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); } - for(auto& fixedRegAccess : fixedRegs.listOutput) + for (auto& fixedRegAccess : fixedRegs.listOutput) { - if(fixedRegAccess.reg != IMLREG_INVALID) + if (fixedRegAccess.reg != IMLREG_INVALID) AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); } index++; @@ -1491,7 +1521,7 @@ void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, I if (it == segDistMap.end()) { sint32 startIndex; - if(imlSegment->HasSuffixInstruction()) + if (imlSegment->HasSuffixInstruction()) startIndex = imlSegment->GetSuffixInstructionIndex(); else startIndex = RA_INTER_RANGE_END; @@ -1621,11 +1651,15 @@ void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSeg std::vector list_segments; std::vector list_processedSegment; size_t segmentCount = ctx.deprGenContext->segmentList2.size(); - list_segments.reserve(segmentCount+1); + list_segments.reserve(segmentCount + 1); list_processedSegment.resize(segmentCount); - auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {list_processedSegment[seg->momentaryIndex] = true; }; - auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { return list_processedSegment[seg->momentaryIndex]; }; + auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) { + list_processedSegment[seg->momentaryIndex] = true; + }; + auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { + return list_processedSegment[seg->momentaryIndex]; + }; markSegProcessed(imlSegment); sint32 index = 0; @@ -1681,11 +1715,11 @@ void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) if (hasLoopExit == false) continue; - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); for (auto& it : segMap) { - if(it.second.usageEnd != RA_INTER_RANGE_END) + if (it.second.usageEnd != RA_INTER_RANGE_END) continue; if (imlSegment->nextSegmentBranchTaken) IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first); @@ -1730,10 +1764,8 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange) subrange->_noLoad = true; } - struct subrangeEndingInfo_t { - //boost::container::small_vector subrangeList2; raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE]; sint32 subrangeCount; @@ -1813,7 +1845,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) { raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; - if( subrangeItr->hasStore ) + if (subrangeItr->hasStore) continue; // this ending already stores, no extra cost alreadyStoredInAllEndings = false; sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); @@ -1842,20 +1874,20 @@ void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) { // this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore // track read/write dependencies per segment - for(auto& seg : ppcImlGenContext->segmentList2) + for (auto& seg : ppcImlGenContext->segmentList2) { raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; - while(subrange) + while (subrange) { IMLRA_AnalyzeSubrangeDataDependency(subrange); subrange = subrange->link_allSegmentRanges.next; } } // propagate information across segment boundaries - for(auto& seg : ppcImlGenContext->segmentList2) + for (auto& seg : ppcImlGenContext->segmentList2) { raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; - while(subrange) + while (subrange) { IMLRA_AnalyzeRangeDataFlow(subrange); subrange = subrange->link_allSegmentRanges.next; @@ -1870,8 +1902,6 @@ inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) return IMLReg(baseFormat, baseFormat, 0, regId); } -#define DEBUG_RA_INSTRUCTION_GEN 0 - // prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { @@ -1879,11 +1909,11 @@ void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSeg boost::container::small_vector activeRanges; raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; raInstructionEdge currentEdge; - for(size_t i=0; iimlList.size(); i++) + for (size_t i = 0; i < imlSegment->imlList.size(); i++) { currentEdge.Set(i, false); // set to instruction index on output edge // activate ranges which begin before or during this instruction - while(currentRange && currentRange->interval2.start <= currentEdge) + while (currentRange && currentRange->interval2.start <= currentEdge) { cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict @@ -1895,9 +1925,9 @@ void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSeg imlSegment->imlList[i].RewriteGPR(virtId2PhysReg); // deactivate ranges which end during this instruction auto it = activeRanges.begin(); - while(it != activeRanges.end()) + while (it != activeRanges.end()) { - if((*it)->interval2.end <= currentEdge) + if ((*it)->interval2.end <= currentEdge) { virtId2PhysReg.erase((*it)->GetVirtualRegister()); it = activeRanges.erase(it); @@ -1923,22 +1953,22 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM std::vector rebuiltInstructions; sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0); - if(imlSegment->imlList.empty()) + if (imlSegment->imlList.empty()) { // empty segments need special handling (todo - look into merging this with the core logic below eventually) // store all ranges raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; - while(currentRange) + while (currentRange) { - if(currentRange->hasStore) + if (currentRange->hasStore) rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister())); currentRange = currentRange->link_allSegmentRanges.next; } // load ranges currentRange = imlSegment->raInfo.linkedList_allSubranges; - while(currentRange) + while (currentRange) { - if(!currentRange->_noLoad) + if (!currentRange->_noLoad) { cemu_assert_debug(currentRange->interval2.ExtendsIntoNextSegment()); rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); @@ -1953,14 +1983,14 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM { raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; raInstructionEdge edge; - if(imlSegment->HasSuffixInstruction()) + if (imlSegment->HasSuffixInstruction()) edge.Set(numInstructionsWithoutSuffix, true); else - edge.Set(numInstructionsWithoutSuffix-1, false); + edge.Set(numInstructionsWithoutSuffix - 1, false); - while(currentRange) + while (currentRange) { - if(!currentRange->interval2.IsNextSegmentOnly() && currentRange->interval2.end > edge) + if (!currentRange->interval2.IsNextSegmentOnly() && currentRange->interval2.end > edge) { currentRange->interval2.SetEnd(edge); } @@ -1973,7 +2003,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM cemuLog_log(LogType::Force, "--- Intermediate liveness info ---"); { raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges; - while(dbgRange) + while (dbgRange) { cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString()); dbgRange = dbgRange->link_allSegmentRanges.next; @@ -1986,7 +2016,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; // make all ranges active that start on RA_INTER_RANGE_START - while(currentRange && currentRange->interval2.start.ConnectsToPreviousSegment()) + while (currentRange && currentRange->interval2.start.ConnectsToPreviousSegment()) { activeRanges.push_back(currentRange); currentRange = currentRange->link_allSegmentRanges.next; @@ -1995,12 +2025,12 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM auto it = activeRanges.begin(); raInstructionEdge firstOutputEdge; firstOutputEdge.Set(0, false); - while(it != activeRanges.end()) + while (it != activeRanges.end()) { - if( (*it)->interval2.end < firstOutputEdge) + if ((*it)->interval2.end < firstOutputEdge) { raLivenessRange* storedRange = *it; - if(storedRange->hasStore) + if (storedRange->hasStore) rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); it = activeRanges.erase(it); continue; @@ -2009,14 +2039,14 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM } sint32 numInstructions = (sint32)imlSegment->imlList.size(); - for(sint32 i=0; iinterval2.start <= curEdge) + curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction + while (currentRange && currentRange->interval2.start <= curEdge) { - if(!currentRange->_noLoad) + if (!currentRange->_noLoad) { rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); } @@ -2026,20 +2056,19 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM // copy instruction rebuiltInstructions.push_back(imlSegment->imlList[i]); // output edge - curEdge.SetRaw(i*2+1+1); + curEdge.SetRaw(i * 2 + 1 + 1); // also store ranges that end on the next input edge, we handle this by adding an extra 1 above auto it = activeRanges.begin(); - while(it != activeRanges.end()) + while (it != activeRanges.end()) { - if( (*it)->interval2.end <= curEdge) + if ((*it)->interval2.end <= curEdge) { // range expires // we cant erase it from virtId2PhysReg right away because a store might happen before the last use (the +1 thing above) - // todo - check hasStore raLivenessRange* storedRange = *it; - if(storedRange->hasStore) + if (storedRange->hasStore) { cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); @@ -2053,21 +2082,21 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM } // if there is no suffix instruction we currently need to handle the final loads here cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); - if(imlSegment->HasSuffixInstruction()) + if (imlSegment->HasSuffixInstruction()) { cemu_assert_debug(!currentRange); // currentRange should be NULL? - for(auto& remainingRange : activeRanges) + for (auto& remainingRange : activeRanges) { cemu_assert_debug(!remainingRange->hasStore); } } else { - for(auto& remainingRange : activeRanges) + for (auto& remainingRange : activeRanges) { cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored } - while(currentRange) + while (currentRange) { cemu_assert_debug(currentRange->interval2.IsNextSegmentOnly()); cemu_assert_debug(!currentRange->_noLoad); @@ -2095,18 +2124,50 @@ void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx) } } -void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx) +static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment) { +#if DEBUG_RA_EXTRA_VALIDATION + std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment); + for(auto& fixedReq : frr) + { + for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next) + { + if (!range->interval2.ContainsEdge(fixedReq.pos)) + continue; + // verify if the requirement is compatible + if(range->GetVirtualRegister() == fixedReq.regId) + { + cemu_assert(range->HasPhysicalRegister()); + cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register + } + else + { + cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register + } + } + } +#endif +} + +static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx) +{ +#if DEBUG_RA_EXTRA_VALIDATION for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) + while (subrangeItr) { PPCRecRA_debugValidateSubrange(subrangeItr); subrangeItr = subrangeItr->link_allSegmentRanges.next; } } + // check that no range validates register requirements + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]); + } +#endif } void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) @@ -2121,7 +2182,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext IMLRA_CalculateLivenessRanges(ctx); IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); IMLRA_AssignRegisters(ctx, ppcImlGenContext); - DbgVerifyAllRanges(ctx); // DEBUG + DbgVerifyAllRanges(ctx); IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); IMLRA_GenerateMoveInstructions(ctx); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 9e5573a6c..b5a7610b2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -17,9 +17,19 @@ class IMLPhysRegisterSet m_regBitmask &= ~((uint64)1 << index); } + void SetAllAvailable() + { + m_regBitmask = ~0ull; + } + + bool HasAllAvailable() const + { + return m_regBitmask == ~0ull; + } + bool IsAvailable(uint32 index) const { - return (m_regBitmask & (1 << index)) != 0; + return (m_regBitmask & ((uint64)1 << index)) != 0; } IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 86e0091c2..45d01608b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -67,38 +67,30 @@ boost::container::small_vector raLivenessRange::GetAllSub return subranges; } +void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs) +{ + range->lastIterationIndex = iterationIndex; + for (auto& it : range->list_fixedRegRequirements) + allowedRegs &= it.allowedReg; + // check successors + if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs); + if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs); + // check predecessors + for (auto& prev : range->previousRanges) + { + if (prev->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs); + } +}; + bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters) { - if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment()) - { - auto clusterRanges = GetAllSubrangesInCluster(); - bool hasAnyRequirement = false; - for(auto& subrange : clusterRanges) - { - if(subrange->list_fixedRegRequirements.empty()) - continue; - allowedRegisters = subrange->list_fixedRegRequirements.front().allowedReg; - hasAnyRequirement = true; - break; - } - if(!hasAnyRequirement) - return false; - for(auto& subrange : clusterRanges) - { - for(auto& fixedRegLoc : subrange->list_fixedRegRequirements) - allowedRegisters &= fixedRegLoc.allowedReg; - } - } - else - { - // local check only, slightly faster - if(list_fixedRegRequirements.empty()) - return false; - allowedRegisters = list_fixedRegRequirements.front().allowedReg; - for(auto& fixedRegLoc : list_fixedRegRequirements) - allowedRegisters &= fixedRegLoc.allowedReg; - } - return true; + uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + allowedRegisters.SetAllAvailable(); + GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters); + return !allowedRegisters.HasAllAvailable(); } IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool) @@ -424,6 +416,14 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range) cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx()); cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx()); } + // validate fixed reg requirements + if (!range->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval2.start); + cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval2.end); + for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++) + cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos); + } } #else @@ -563,7 +563,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) { raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; - if (tailInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex())) + if (tailInterval.ContainsEdge(fixedReg->pos)) { tailSubrange->list_fixedRegRequirements.push_back(*fixedReg); } @@ -572,7 +572,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) { raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; - if (!headInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex())) + if (!headInterval.ContainsEdge(fixedReg->pos)) { subrange->list_fixedRegRequirements.resize(i); break; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 4586bb07b..4d928a26b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -335,6 +335,9 @@ struct raLivenessRange void SetPhysicalRegister(sint32 physicalRegister); void SetPhysicalRegisterForCluster(sint32 physicalRegister); void UnsetPhysicalRegister() { physicalRegister = -1; } + + private: + void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs); }; raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 84d53b266..1ad411904 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -181,9 +181,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } } - // if(range.startAddress < 0x0202fa3C || range.startAddress > 0x0202FA7C) - // return nullptr; // DEBUG - PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; @@ -340,15 +337,6 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); - - // if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8 && ppcImlGenContext.debug_entryPPCAddress < 0x0240C0AC) - // { - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - // } - // else if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8) - // return false; - return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index d10640f42..928bbc90d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1513,7 +1513,7 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); else ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regMemResEA, regMemResEA, ~31); // zero out the cacheline for(sint32 i = 0; i < 32; i += 4) ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index ffee73eac..96a7b560c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -4,6 +4,9 @@ #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +ATTR_MS_ABI double frsqrte_espresso(double input); +ATTR_MS_ABI double fres_espresso(double input); + IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) @@ -1007,9 +1010,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load registers IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + // copy result to top + if( ppcImlGenContext->PSE ) + PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); return true; } @@ -1026,9 +1032,7 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod } PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD); if( ppcImlGenContext->PSE ) - { PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } return true; } @@ -1075,7 +1079,7 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op // hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr); IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); return true; From f1fa4949d29f7e095e10d9720f894d3cddaca041 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 23 Oct 2024 08:36:37 +0200 Subject: [PATCH 51/64] Add natvis file for boost::container::small_vector --- boost.natvis | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 boost.natvis diff --git a/boost.natvis b/boost.natvis new file mode 100644 index 000000000..cee3e3d54 --- /dev/null +++ b/boost.natvis @@ -0,0 +1,14 @@ + + + + + + m_holder.m_size + + m_holder.m_size + m_holder.m_start + + + + + From e34a27399941c36df63e53f34a76a53248d314c4 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 23 Oct 2024 18:49:15 +0200 Subject: [PATCH 52/64] PPCRec: Optimize register allocation --- .../Recompiler/IML/IMLRegisterAllocator.cpp | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 5309b127b..4ce1ffd5f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -596,6 +596,11 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) { IMLPhysRegisterSet allowedRegs; + if(currentRange->list_fixedRegRequirements.empty()) + { + currentRange = currentRange->link_allSegmentRanges.next; + continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements + } if (!currentRange->GetAllowedRegistersEx(allowedRegs)) { currentRange = currentRange->link_allSegmentRanges.next; @@ -689,16 +694,6 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment } } } - for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) - { - IMLPhysRegisterSet allowedRegs; - if (!currentRange->GetAllowedRegistersEx(allowedRegs)) - { - cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); - continue; - } - cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister())); - } #endif } @@ -1275,6 +1270,22 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* // assign fixed registers first for (IMLSegment* segIt : ppcImlGenContext->segmentList2) IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt); +#if DEBUG_RA_EXTRA_VALIDATION + // fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well + for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2) + { + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); + continue; + } + cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister())); + } + } +#endif while (true) { From 5949e620c5685d79f60b43726950d74713fc35de Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:51:22 +0200 Subject: [PATCH 53/64] PPCRec: Reintroduce optimization for BDNZ loops --- .../Recompiler/PPCRecompilerImlGen.cpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 928bbc90d..cf25128b6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2930,6 +2930,30 @@ void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenCo cemu_assert_debug(ppcImlGenContext.currentBasicBlock->lastAddress == ppcImlGenContext.ppcAddressOfCurrentInstruction); } +bool PPCRecompiler_IsBasicBlockATightFiniteLoop(IMLSegment* imlSegment, PPCBasicBlockInfo& basicBlockInfo) +{ + // if we detect a finite loop we can skip generating the cycle check + // currently we only check for BDNZ loops since thats reasonably safe to rely on + // however there are other forms of loops that can be classified as finite, + // but detecting those involves analyzing PPC code and we dont have the infrastructure for that (e.g. IML has CheckRegisterUsage but we dont have an equivalent for PPC code) + + // base criteria, must jump to beginning of same segment + if (imlSegment->nextSegmentBranchTaken != imlSegment) + return false; + + uint32 opcode = *(uint32be*)(memory_base + basicBlockInfo.lastAddress); + if (Espresso::GetPrimaryOpcode(opcode) != Espresso::PrimaryOpcode::BC) + return false; + uint32 BO, BI, BD; + PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + Espresso::BOField boField(BO); + if(!boField.conditionIgnore() || boField.branchAlways()) + return false; + if(boField.decrementerIgnore()) + return false; + return true; +} + void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) { IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); @@ -2938,8 +2962,7 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P if (basicBlockInfo.branchTarget > basicBlockInfo.startAddress) return; - // exclude non-infinite tight loops - if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + if (PPCRecompiler_IsBasicBlockATightFiniteLoop(imlSegment, basicBlockInfo)) return; // make the segment enterable so execution can return after passing a check From 70c99fd626243fd1d30b0da35d7fc65136cbcc87 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:17:04 +0200 Subject: [PATCH 54/64] PPCRec: Use 32bit mov for 32bit operations --- .../HW/Espresso/Recompiler/BackendX64/BackendX64.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index f08f0524c..cced18c4f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1100,13 +1100,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction { uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if(regR != regA) - x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32); } else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { if (regR != regA) - x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32); } else if (imlInstruction->operation == PPCREC_IML_OP_AND || @@ -1114,7 +1114,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_XOR) { if (regR != regA) - x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); if (imlInstruction->operation == PPCREC_IML_OP_AND) x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32); else if (imlInstruction->operation == PPCREC_IML_OP_OR) @@ -1131,7 +1131,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regA); // rotate destination register if( sh ) x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); @@ -1148,7 +1148,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize if( regR != regA ) - x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP); } else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || @@ -1156,7 +1156,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { if( regA != regR ) - x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) From 96d7c754f9266532d73ac4f86a9d7b82bb53ec72 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 25 Oct 2024 19:00:11 +0200 Subject: [PATCH 55/64] PPCRec: Update spill cost calculation --- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 3 - .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 50 ------------ .../Recompiler/IML/IMLRegisterAllocator.cpp | 21 +++-- .../IML/IMLRegisterAllocatorRanges.cpp | 79 ++++++++++++++++--- .../IML/IMLRegisterAllocatorRanges.h | 40 ++++++---- 5 files changed, 103 insertions(+), 90 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 98c48a849..bc0c27c54 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -3,9 +3,6 @@ #include "IMLInstruction.h" #include "IMLSegment.h" -// analyzer -bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment); - // optimizer passes void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 77403e1b8..6ae4b5916 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -3,53 +3,3 @@ #include "util/helpers/fixedSizeList.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" - -/* - * Analyzes a single segment and returns true if it is a finite loop - */ -bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) -{ - return false; // !!! DISABLED !!! - - bool isTightFiniteLoop = false; - // base criteria, must jump to beginning of same segment - if (imlSegment->nextSegmentBranchTaken != imlSegment) - return false; - // loops using BDNZ are assumed to always be finite - for(const IMLInstruction& instIt : imlSegment->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB) - { - return true; - } - } - // for non-BDNZ loops, check for common patterns - // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB) - // this catches most loops with load-update and store-update instructions, but also those with decrementing counters - FixedSizeList list_modifiedRegisters; - for (const IMLInstruction& instIt : imlSegment->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) ) - { - list_modifiedRegisters.addUnique(instIt.op_r_immS32.regR); - } - } - if (list_modifiedRegisters.count > 0) - { - // remove all registers from the list that are modified by non-ADD/SUB instructions - // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out - IMLUsedRegisters registersUsed; - for (const IMLInstruction& instIt : imlSegment->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) - continue; - instIt.CheckRegisterUsage(®istersUsed); - registersUsed.ForEachWrittenGPR([&](IMLReg r) { list_modifiedRegisters.remove(r); }); - } - if (list_modifiedRegisters.count > 0) - { - return true; - } - } - return false; -} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 4ce1ffd5f..b75c389ca 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -15,7 +15,6 @@ #define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed #define DEBUG_RA_INSTRUCTION_GEN 0 - struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment { IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) @@ -38,7 +37,7 @@ struct IMLRegisterAllocatorContext IMLRegisterAllocatorParameters* raParam; ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec - std::unordered_map regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary + std::unordered_map regIdToBaseFormat; // first pass std::vector> perSegmentAbstractRanges; @@ -781,11 +780,11 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy cemu_assert_debug(currentRangeStart.IsInstructionIndex()); distance2 = std::min(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment // calculate split cost of candidate - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2); + sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2); // calculate additional split cost of currentRange if hole is not large enough if (distance2 < requiredSize2) { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2); + cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2); // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) cost += (requiredSize2 - distance2) / 10; } @@ -889,7 +888,7 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy continue; // calculate additional cost due to split cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register? - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); // add small additional cost for the remaining range (prefer larger holes) cost += ((requiredSize2 - distance) / 2) / 10; if (cost < strategyCost) @@ -959,11 +958,11 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); if (distance < 2) continue; - sint32 cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); + sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate); // if the hole is not large enough, add cost of splitting current subrange if (distance < requiredSize2) { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); // add small additional cost for the remaining range (prefer larger holes) cost += ((requiredSize2 - distance) / 2) / 10; } @@ -1032,7 +1031,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; sint32 cost; - cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); + cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate); // compare with current best candidate for this strategy if (cost < strategyCost) { @@ -1043,7 +1042,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy } // add current range as a candidate too sint32 ownCost; - ownCost = PPCRecRARange_estimateCostAfterRangeExplode(currentRange); + ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange); if (ownCost < strategyCost) { strategyCost = ownCost; @@ -1859,7 +1858,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) if (subrangeItr->hasStore) continue; // this ending already stores, no extra cost alreadyStoredInAllEndings = false; - sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); + sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment); delayStoreCost = std::max(storeCost, delayStoreCost); } if (alreadyStoredInAllEndings) @@ -1867,7 +1866,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) subrange->hasStore = false; subrange->hasStoreDelayed = true; } - else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) + else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)) { subrange->hasStore = false; subrange->hasStoreDelayed = true; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 45d01608b..2f4581ee4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -642,7 +642,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 inde subrange->list_locations.emplace_back(index, isRead, isWrite); } -sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment) +sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment) { sint32 v = imlSegment->loopDepth + 1; v *= 5; @@ -668,13 +668,13 @@ sint32 PPCRecRARange_estimateTotalCost(std::span ranges) if (!subrange->interval2.ExtendsPreviousSegment()) { //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); + mostExpensiveRead = std::max(mostExpensiveRead, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)); readCount++; } if (!subrange->interval2.ExtendsIntoNextSegment()) { //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); + mostExpensiveWrite = std::max(mostExpensiveWrite, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)); writeCount++; } } @@ -683,21 +683,34 @@ sint32 PPCRecRARange_estimateTotalCost(std::span ranges) return cost; } -// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it -sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange) +// calculate additional cost of range that it would have after calling _ExplodeRange() on it +sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) { auto ranges = subrange->GetAllSubrangesInCluster(); - sint32 cost = -PPCRecRARange_estimateTotalCost(ranges); + sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges); for (auto& subrange : ranges) { if (subrange->list_locations.empty()) - continue; - cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store + continue; // this range would be deleted and thus has no cost + sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); + bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment(); + bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment(); + if(hasAdditionalLoad && !subrange->list_locations.front().isRead && subrange->list_locations.front().isWrite) // if written before read, then a load isn't necessary + { + cost += segmentLoadStoreCost; + } + if(hasAdditionalStore) + { + bool hasWrite = std::find_if(subrange->list_locations.begin(), subrange->list_locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != subrange->list_locations.end(); + if(!hasWrite) // ranges which don't modify their value do not need to be stored + cost += segmentLoadStoreCost; + } } + // todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation return cost; } -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition) +sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition) { // validation #ifdef CEMU_DEBUG_ASSERT @@ -719,9 +732,53 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, if (splitInstructionIndex > subrange->list_locations.back().index) return 0; - // todo - determine exact cost of split subranges + // this can be optimized, but we should change list_locations to track instruction edges instead of instruction indices + std::vector headLocations; + std::vector tailLocations; + for (auto& location : subrange->list_locations) + { + if(location.GetReadPos() < splitPosition || location.GetWritePos() < splitPosition) + headLocations.push_back(location); + if(location.GetReadPos() >= splitPosition || location.GetWritePos() >= splitPosition) + tailLocations.push_back(location); + } + // fixup locations + if(!headLocations.empty() && headLocations.back().GetWritePos() >= splitPosition) + { + headLocations.back().isWrite = false; + if(!headLocations.back().isRead && !headLocations.back().isWrite) + headLocations.pop_back(); + } + if(!tailLocations.empty() && tailLocations.front().GetReadPos() < splitPosition) + { + tailLocations.front().isRead = false; + if(!tailLocations.front().isRead && !tailLocations.front().isWrite) + tailLocations.erase(tailLocations.begin()); + } + + // based on + sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); + + auto CalculateCostFromLocationRange = [segmentLoadStoreCost](const std::vector& locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32 + { + if(locations.empty()) + return 0; + sint32 cost = 0; + if(locations.front().isRead && trackLoadCost) + cost += segmentLoadStoreCost; // not overwritten, so there is a load cost + bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != locations.end(); + if(hasWrite && trackStoreCost) + cost += segmentLoadStoreCost; // modified, so there is a store cost + return cost; + }; + + sint32 baseCost = CalculateCostFromLocationRange(subrange->list_locations); + + bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().isRead && tailLocations.front().isWrite; - cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store + sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true); + cemu_assert_debug(newCost >= baseCost); + cost = newCost - baseCost; return cost; } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 4d928a26b..5173031e4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -1,18 +1,6 @@ #pragma once #include "IMLRegisterAllocator.h" -struct raLivenessLocation_t -{ - sint32 index; - bool isRead; - bool isWrite; - - raLivenessLocation_t() = default; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; -}; - struct raLivenessSubrangeLink { struct raLivenessRange* prev; @@ -167,6 +155,28 @@ struct raInstructionEdge }; +struct raLivenessLocation_t +{ + sint32 index; + bool isRead; + bool isWrite; + + raLivenessLocation_t() = default; + + raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) + : index(index), isRead(isRead), isWrite(isWrite) {}; + + raInstructionEdge GetReadPos() + { + return raInstructionEdge(index, true); + } + + raInstructionEdge GetWritePos() + { + return raInstructionEdge(index, false); + } +}; + struct raInterval { raInterval() @@ -354,7 +364,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 inde void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); // cost estimation -sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); -sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange); +sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment); +sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange); //sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition); \ No newline at end of file +sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition); \ No newline at end of file From 636b63fda212235a6f1bc4090975a528b725f621 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 26 Oct 2024 12:33:23 +0200 Subject: [PATCH 56/64] PPCRec: Refactor read/write access tracking for liveness ranges --- .../Espresso/Recompiler/IML/IMLInstruction.h | 21 ++ .../Recompiler/IML/IMLRegisterAllocator.cpp | 77 +++-- .../IML/IMLRegisterAllocatorRanges.cpp | 287 +++++------------- .../IML/IMLRegisterAllocatorRanges.h | 24 +- 4 files changed, 144 insertions(+), 265 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 4394176b5..677da5c2d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -408,6 +408,27 @@ struct IMLUsedRegisters F(readGPR3); } + // temporary (for FPRs) + template + void ForEachWrittenFPR(Fn F) const + { + if (writtenFPR1.IsValid()) + F(writtenFPR1); + } + + template + void ForEachReadFPR(Fn F) const + { + if (readFPR1.IsValid()) + F(readFPR1); + if (readFPR2.IsValid()) + F(readFPR2); + if (readFPR3.IsValid()) + F(readFPR3); + if (readFPR4.IsValid()) + F(readFPR4); + } + template void ForEachAccessedGPR(Fn F) const { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index b75c389ca..c7764fa3e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -168,7 +168,8 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe { IMLPhysRegisterSet ps; ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX); - fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); + fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX + fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX } else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) { @@ -262,30 +263,14 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition) { - sint32 startInstructionIndex; - if (startPosition.ConnectsToPreviousSegment()) - startInstructionIndex = 0; - else - startInstructionIndex = startPosition.GetInstructionIndex(); - for (sint32 i = 0; i < subrange->list_locations.size(); i++) + for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++) { - if (subrange->list_locations[i].index >= startInstructionIndex) + if (subrange->list_accessLocations[i].pos >= startPosition) { - sint32 preciseIndex = subrange->list_locations[i].index * 2; - cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access - // check read edge - if (subrange->list_locations[i].isRead) - { - if (preciseIndex >= startPosition.GetRaw()) - return preciseIndex - startPosition.GetRaw(); - } - // check write edge - if (subrange->list_locations[i].isWrite) - { - preciseIndex++; - if (preciseIndex >= startPosition.GetRaw()) - return preciseIndex - startPosition.GetRaw(); - } + auto& it = subrange->list_accessLocations[i]; + cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write + cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment()); + return it.pos.GetRaw() - startPosition.GetRaw(); } } cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000); @@ -549,9 +534,7 @@ struct raFixedRegRequirementWithVGPR std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment) { std::vector frrList; - size_t index = 0; - IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { IMLFixedRegisters fixedRegs; @@ -560,7 +543,7 @@ std::vector IMLRA_BuildSegmentInstructionFixedReg pos.Set(index, true); for (auto& fixedRegAccess : fixedRegs.listInput) { - frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID); } pos = pos + 1; for (auto& fixedRegAccess : fixedRegs.listOutput) @@ -1468,6 +1451,19 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx return subrange; } +void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos) +{ + if (subrange->list_accessLocations.empty()) + { + subrange->list_accessLocations.emplace_back(pos); + return; + } + if(subrange->list_accessLocations.back().pos == pos) + return; + cemu_assert_debug(subrange->list_accessLocations.back().pos < pos); + subrange->list_accessLocations.emplace_back(pos); +} + // take abstract range data and create LivenessRanges void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { @@ -1500,12 +1496,27 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML while (index < imlSegment->imlList.size()) { imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { + raInstructionEdge pos((sint32)index, true); + gprTracking.ForEachReadGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + raLivenessRange* subrange = regToSubrange.find(gprId)->second; + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); + }); + gprTracking.ForEachReadFPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + raLivenessRange* subrange = regToSubrange.find(gprId)->second; + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); + }); + pos = {(sint32)index, false}; + gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + raLivenessRange* subrange = regToSubrange.find(gprId)->second; + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); + }); + gprTracking.ForEachWrittenFPR([&](IMLReg gprReg) { IMLRegID gprId = gprReg.GetRegID(); raLivenessRange* subrange = regToSubrange.find(gprId)->second; - PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); - cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index); - cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index); + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); }); // check fixed register requirements IMLFixedRegisters fixedRegs; @@ -1754,13 +1765,13 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange) bool isRead = false; bool isWritten = false; bool isOverwritten = false; - for (auto& location : subrange->list_locations) + for (auto& location : subrange->list_accessLocations) { - if (location.isRead) + if (location.IsRead()) { isRead = true; } - if (location.isWrite) + if (location.IsWrite()) { if (isRead == false) isOverwritten = true; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 2f4581ee4..2c930651b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -207,7 +207,7 @@ raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, { raLivenessRange* range = memPool_livenessSubrange.acquireObj(); range->previousRanges.clear(); - range->list_locations.clear(); + range->list_accessLocations.clear(); range->list_fixedRegRequirements.clear(); range->imlSegment = imlSegment; @@ -259,39 +259,16 @@ void _unlinkSubrange(raLivenessRange* subrange) void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); - //subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); - subrange->list_locations.clear(); - - //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start); - //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end); + subrange->list_accessLocations.clear(); + subrange->list_fixedRegRequirements.clear(); memPool_livenessSubrange.releaseObj(subrange); } -// leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore -void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) -{ - _unlinkSubrange(subrange); - //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start); - //PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end); - memPool_livenessSubrange.releaseObj(subrange); - -// #ifdef CEMU_DEBUG_ASSERT -// // DEBUG BEGIN -// subrange->lastIterationIndex = 0xFFFFFFFE; -// subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; -// subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; -// -// // DEBUG END -// #endif -} - void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { auto clusterRanges = subrange->GetAllSubrangesInCluster(); for (auto& subrange : clusterRanges) - { - _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange); - } + PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); } void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) @@ -300,9 +277,7 @@ void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) { raLivenessRange* cur; while(cur = seg->raInfo.linkedList_allSubranges) - { - _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur); - } + PPCRecRA_deleteSubrange(ppcImlGenContext, cur); seg->raInfo.linkedList_allSubranges = nullptr; seg->raInfo.linkedList_perVirtualRegister.clear(); } @@ -322,7 +297,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan if (subrange == absorbedSubrange) assert_dbg(); #endif - // update references subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; @@ -334,22 +308,9 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; // merge usage locations - // at the merge point both ranges might track the same instruction, we handle this by first merging this duplicate location - if(subrange && absorbedSubrange && !subrange->list_locations.empty() && !absorbedSubrange->list_locations.empty()) - { - if(subrange->list_locations.back().index == absorbedSubrange->list_locations.front().index) - { - subrange->list_locations.back().isRead |= absorbedSubrange->list_locations.front().isRead; - subrange->list_locations.back().isWrite |= absorbedSubrange->list_locations.front().isWrite; - absorbedSubrange->list_locations.erase(absorbedSubrange->list_locations.begin()); // inefficient - } - } - for (auto& location : absorbedSubrange->list_locations) - { - cemu_assert_debug(subrange->list_locations.empty() || (subrange->list_locations.back().index < location.index)); // todo - sometimes a subrange can contain the same instruction at the merge point if they are covering half of the instruction edge - subrange->list_locations.push_back(location); - } - absorbedSubrange->list_locations.clear(); + for (auto& accessLoc : absorbedSubrange->list_accessLocations) + subrange->list_accessLocations.push_back(accessLoc); + absorbedSubrange->list_accessLocations.clear(); // merge fixed reg locations #ifdef CEMU_DEBUG_ASSERT if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty()) @@ -358,9 +319,8 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan } #endif for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements) - { subrange->list_fixedRegRequirements.push_back(fixedReg); - } + absorbedSubrange->list_fixedRegRequirements.clear(); subrange->interval2.end = absorbedSubrange->interval2.end; @@ -376,18 +336,29 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange auto clusterRanges = originRange->GetAllSubrangesInCluster(); for (auto& subrange : clusterRanges) { - if (subrange->list_locations.empty()) + if (subrange->list_accessLocations.empty()) continue; raInterval interval; - interval.SetInterval(subrange->list_locations.front().index, true, subrange->list_locations.back().index, true); + interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos); raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); // copy locations and fixed reg indices - newSubrange->list_locations = subrange->list_locations; + newSubrange->list_accessLocations = subrange->list_accessLocations; newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements; if(originRange->HasPhysicalRegister()) { cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement } + // validate + if(!newSubrange->list_accessLocations.empty()) + { + cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval2.start); + cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval2.end); + } + if(!newSubrange->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval2.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters() + cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval2.end); + } } // remove subranges PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange); @@ -411,10 +382,10 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range) cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment()); } // validate locations - if (!range->list_locations.empty()) + if (!range->list_accessLocations.empty()) { - cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx()); - cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx()); + cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval2.start); + cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval2.end); } // validate fixed reg requirements if (!range->list_fixedRegRequirements.empty()) @@ -430,41 +401,11 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range) void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {} #endif -// since locations are per-instruction, but intervals are per-edge, it's possible that locations track reads/writes outside of the range -// this function will remove any outside read/write locations -void IMLRA_FixLocations(raLivenessRange* range) -{ - if(range->list_locations.empty()) - return; - if(range->interval2.start.IsInstructionIndex() && range->interval2.start.GetInstructionIndex() == range->list_locations.front().index) - { - auto& location = range->list_locations.front(); - if(range->interval2.start.IsOnOutputEdge()) - { - location.isRead = false; - if(!location.isRead && !location.isWrite) - range->list_locations.erase(range->list_locations.begin()); - } - } - if(range->list_locations.empty()) - return; - if(range->interval2.end.IsInstructionIndex() && range->interval2.end.GetInstructionIndex() == range->list_locations.back().index) - { - auto& location = range->list_locations.back(); - if(range->interval2.end.IsOnInputEdge()) - { - location.isWrite = false; - if(!location.isRead && !location.isWrite) - range->list_locations.pop_back(); - } - } -} - // trim start and end of range to match first and last read/write locations // does not trim start/endpoints which extend into the next/previous segment void IMLRA_TrimRangeToUse(raLivenessRange* range) { - if(range->list_locations.empty()) + if(range->list_accessLocations.empty()) { // special case where we trim ranges extending from other segments to a single instruction edge cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex()); @@ -474,25 +415,18 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range) range->interval2.end = range->interval2.start; return; } + // trim start and end raInterval prevInterval = range->interval2; - // trim start if(range->interval2.start.IsInstructionIndex()) - { - bool isInputEdge = range->list_locations.front().isRead; - range->interval2.start.Set(range->list_locations.front().index, isInputEdge); - } - // trim end + range->interval2.start = range->list_accessLocations.front().pos; if(range->interval2.end.IsInstructionIndex()) - { - bool isOutputEdge = range->list_locations.back().isWrite; - range->interval2.end.Set(range->list_locations.back().index, !isOutputEdge); - } + range->interval2.end = range->list_accessLocations.back().pos; // extra checks #ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(range->interval2.start <= range->interval2.end); - for(auto& loc : range->list_locations) + for(auto& loc : range->list_accessLocations) { - cemu_assert_debug(range->interval2.ContainsInstructionIndex(loc.index)); + cemu_assert_debug(range->interval2.ContainsEdge(loc.pos)); } cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2)); #endif @@ -532,33 +466,25 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange; // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case #ifdef CEMU_DEBUG_ASSERT - if(!subrange->list_locations.empty()) + if(subrange->list_accessLocations.size() > 1) { - sint32 curIdx = -1; - for(auto& location : subrange->list_locations) + for(size_t i=0; ilist_accessLocations.size()-1; i++) { - cemu_assert_debug(curIdx < location.index); - curIdx = location.index; + cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos); } } #endif // split locations - // since there are 2 edges per instruction and locations track both via a single index, locations on the split point might need to be copied into both ranges - for (auto& location : subrange->list_locations) - { - if(tailInterval.ContainsInstructionIndex(location.index)) - tailSubrange->list_locations.push_back(location); - } - // remove tail locations from head - for (sint32 i = 0; i < subrange->list_locations.size(); i++) - { - raLivenessLocation_t* location = subrange->list_locations.data() + i; - if (!headInterval.ContainsInstructionIndex(location->index)) - { - subrange->list_locations.resize(i); - break; - } - } + auto it = std::lower_bound( + subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition, + [](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; } + ); + size_t originalCount = subrange->list_accessLocations.size(); + tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end()); + subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end()); + cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition); + cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition); + cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount); // split fixed reg requirements for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) { @@ -581,15 +507,10 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte // adjust intervals subrange->interval2 = headInterval; tailSubrange->interval2 = tailInterval; - // fix locations to only include read/write edges within the range - if(subrange) - IMLRA_FixLocations(subrange); - if(tailSubrange) - IMLRA_FixLocations(tailSubrange); // trim to hole if(trimToHole) { - if(subrange->list_locations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex())) + if(subrange->list_accessLocations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex())) { PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); subrange = nullptr; @@ -598,7 +519,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte { IMLRA_TrimRangeToUse(subrange); } - if(tailSubrange->list_locations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex())) + if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex())) { PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange); tailSubrange = nullptr; @@ -622,26 +543,6 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte return tailSubrange; } -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite) -{ - if (subrange->list_locations.empty()) - { - subrange->list_locations.emplace_back(index, isRead, isWrite); - return; - } - raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1); - cemu_assert_debug(lastLocation->index <= index); - if (lastLocation->index == index) - { - // update - lastLocation->isRead = lastLocation->isRead || isRead; - lastLocation->isWrite = lastLocation->isWrite || isWrite; - return; - } - // add new - subrange->list_locations.emplace_back(index, isRead, isWrite); -} - sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment) { sint32 v = imlSegment->loopDepth + 1; @@ -649,40 +550,6 @@ sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment) return v*v; // 25, 100, 225, 400 } -// calculate cost of entire range cluster -sint32 PPCRecRARange_estimateTotalCost(std::span ranges) -{ - sint32 cost = 0; - - // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). - - // currently we calculate the cost based on the most expensive entry/exit point - - sint32 mostExpensiveRead = 0; - sint32 mostExpensiveWrite = 0; - sint32 readCount = 0; - sint32 writeCount = 0; - - for (auto& subrange : ranges) - { - if (!subrange->interval2.ExtendsPreviousSegment()) - { - //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveRead = std::max(mostExpensiveRead, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)); - readCount++; - } - if (!subrange->interval2.ExtendsIntoNextSegment()) - { - //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveWrite = std::max(mostExpensiveWrite, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)); - writeCount++; - } - } - cost = mostExpensiveRead + mostExpensiveWrite; - cost = cost + (readCount + writeCount) / 10; - return cost; -} - // calculate additional cost of range that it would have after calling _ExplodeRange() on it sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) { @@ -690,18 +557,19 @@ sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges); for (auto& subrange : ranges) { - if (subrange->list_locations.empty()) + if (subrange->list_accessLocations.empty()) continue; // this range would be deleted and thus has no cost sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment(); bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment(); - if(hasAdditionalLoad && !subrange->list_locations.front().isRead && subrange->list_locations.front().isWrite) // if written before read, then a load isn't necessary + if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary { + cemu_assert_debug(!subrange->list_accessLocations.front().IsRead()); cost += segmentLoadStoreCost; } if(hasAdditionalStore) { - bool hasWrite = std::find_if(subrange->list_locations.begin(), subrange->list_locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != subrange->list_locations.end(); + bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end(); if(!hasWrite) // ranges which don't modify their value do not need to be stored cost += segmentLoadStoreCost; } @@ -721,60 +589,45 @@ sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInst sint32 cost = 0; // find split position in location list - if (subrange->list_locations.empty()) - { - assert_dbg(); // should not happen? + if (subrange->list_accessLocations.empty()) return 0; - } - sint32 splitInstructionIndex = splitPosition.GetInstructionIndex(); - if (splitInstructionIndex <= subrange->list_locations.front().index) + if (splitPosition <= subrange->list_accessLocations.front().pos) return 0; - if (splitInstructionIndex > subrange->list_locations.back().index) + if (splitPosition > subrange->list_accessLocations.back().pos) return 0; - // this can be optimized, but we should change list_locations to track instruction edges instead of instruction indices - std::vector headLocations; - std::vector tailLocations; - for (auto& location : subrange->list_locations) - { - if(location.GetReadPos() < splitPosition || location.GetWritePos() < splitPosition) - headLocations.push_back(location); - if(location.GetReadPos() >= splitPosition || location.GetWritePos() >= splitPosition) - tailLocations.push_back(location); - } - // fixup locations - if(!headLocations.empty() && headLocations.back().GetWritePos() >= splitPosition) + size_t firstTailLocationIndex = 0; + for (size_t i = 0; i < subrange->list_accessLocations.size(); i++) { - headLocations.back().isWrite = false; - if(!headLocations.back().isRead && !headLocations.back().isWrite) - headLocations.pop_back(); - } - if(!tailLocations.empty() && tailLocations.front().GetReadPos() < splitPosition) - { - tailLocations.front().isRead = false; - if(!tailLocations.front().isRead && !tailLocations.front().isWrite) - tailLocations.erase(tailLocations.begin()); + if (subrange->list_accessLocations[i].pos >= splitPosition) + { + firstTailLocationIndex = i; + break; + } } + std::span headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex}; + std::span tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex}; + cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition); + cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition); - // based on sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); - auto CalculateCostFromLocationRange = [segmentLoadStoreCost](const std::vector& locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32 + auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32 { if(locations.empty()) return 0; sint32 cost = 0; - if(locations.front().isRead && trackLoadCost) + if(locations.front().IsRead() && trackLoadCost) cost += segmentLoadStoreCost; // not overwritten, so there is a load cost - bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != locations.end(); + bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end(); if(hasWrite && trackStoreCost) cost += segmentLoadStoreCost; // modified, so there is a store cost return cost; }; - sint32 baseCost = CalculateCostFromLocationRange(subrange->list_locations); + sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations); - bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().isRead && tailLocations.front().isWrite; + bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite(); sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true); cemu_assert_debug(newCost >= baseCost); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 5173031e4..bc78a1155 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -155,26 +155,21 @@ struct raInstructionEdge }; -struct raLivenessLocation_t +struct raAccessLocation { - sint32 index; - bool isRead; - bool isWrite; + raAccessLocation(raInstructionEdge pos) : pos(pos) {} - raLivenessLocation_t() = default; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; - - raInstructionEdge GetReadPos() + bool IsRead() const { - return raInstructionEdge(index, true); + return pos.IsOnInputEdge(); } - raInstructionEdge GetWritePos() + bool IsWrite() const { - return raInstructionEdge(index, false); + return pos.IsOnOutputEdge(); } + + raInstructionEdge pos; }; struct raInterval @@ -321,7 +316,7 @@ struct raLivenessRange // processing uint32 lastIterationIndex; // instruction read/write locations - std::vector list_locations; + std::vector list_accessLocations; // ordered list of all raInstructionEdge indices which require a fixed register std::vector list_fixedRegRequirements; // linked list (subranges with same GPR virtual register) @@ -360,7 +355,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false); -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite); void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); // cost estimation From 126a682143f2b407bd356ccc5600d68682c339c8 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:27:10 +0200 Subject: [PATCH 57/64] PPCRec: Clean up some outdated code --- .../Recompiler/IML/IMLInstruction.cpp | 74 +++++++++---------- .../Espresso/Recompiler/IML/IMLInstruction.h | 62 ++-------------- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 10 +-- .../Recompiler/IML/IMLRegisterAllocator.cpp | 10 --- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 3 - 5 files changed, 45 insertions(+), 114 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 5d90ea7fa..63714cdb3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -18,13 +18,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = IMLREG_INVALID; registersUsed->readGPR2 = IMLREG_INVALID; registersUsed->readGPR3 = IMLREG_INVALID; + registersUsed->readGPR4 = IMLREG_INVALID; registersUsed->writtenGPR1 = IMLREG_INVALID; registersUsed->writtenGPR2 = IMLREG_INVALID; - registersUsed->readFPR1 = IMLREG_INVALID; - registersUsed->readFPR2 = IMLREG_INVALID; - registersUsed->readFPR3 = IMLREG_INVALID; - registersUsed->readFPR4 = IMLREG_INVALID; - registersUsed->writtenFPR1 = IMLREG_INVALID; if (type == PPCREC_IML_TYPE_R_NAME) { registersUsed->writtenGPR1 = op_r_name.regR; @@ -243,7 +239,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_LOAD) { // fpr load operation - registersUsed->writtenFPR1 = op_storeLoad.registerData; + registersUsed->writtenGPR1 = op_storeLoad.registerData; // address is in gpr register if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; @@ -257,8 +253,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same - registersUsed->readFPR4 = op_storeLoad.registerData; cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); + registersUsed->readGPR2 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: @@ -280,7 +276,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { // fpr load operation - registersUsed->writtenFPR1 = op_storeLoad.registerData; + registersUsed->writtenGPR1 = op_storeLoad.registerData; // address is in gpr registers if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; @@ -297,7 +293,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - registersUsed->readFPR4 = op_storeLoad.registerData; + registersUsed->readGPR3 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: @@ -318,16 +314,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_STORE) { // fpr store operation - registersUsed->readFPR1 = op_storeLoad.registerData; + registersUsed->readGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem.IsValid()) - registersUsed->readGPR1 = op_storeLoad.registerMem; + registersUsed->readGPR2 = op_storeLoad.registerMem; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR2 = op_storeLoad.registerGQR; + registersUsed->readGPR3 = op_storeLoad.registerGQR; break; default: cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); @@ -337,19 +333,19 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { // fpr store operation - registersUsed->readFPR1 = op_storeLoad.registerData; + registersUsed->readGPR1 = op_storeLoad.registerData; // address is in gpr registers if (op_storeLoad.registerMem.IsValid()) - registersUsed->readGPR1 = op_storeLoad.registerMem; + registersUsed->readGPR2 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2.IsValid()) - registersUsed->readGPR2 = op_storeLoad.registerMem2; + registersUsed->readGPR3 = op_storeLoad.registerMem2; // PSQ generic stores also access GQR switch (op_storeLoad.mode) { case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR3 = op_storeLoad.registerGQR; + registersUsed->readGPR4 = op_storeLoad.registerGQR; break; default: cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); @@ -369,8 +365,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { // operand read, result written - registersUsed->readFPR1 = op_fpr_r_r.regA; - registersUsed->writtenFPR1 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->writtenGPR1 = op_fpr_r_r.regR; } else if ( operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || @@ -383,9 +379,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const ) { // operand read, result read and (partially) written - registersUsed->readFPR1 = op_fpr_r_r.regA; - registersUsed->readFPR4 = op_fpr_r_r.regR; - registersUsed->writtenFPR1 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r.regR; + registersUsed->writtenGPR1 = op_fpr_r_r.regR; } else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || @@ -397,9 +393,9 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_SUB_BOTTOM) { // operand read, result read and written - registersUsed->readFPR1 = op_fpr_r_r.regA; - registersUsed->readFPR2 = op_fpr_r_r.regR; - registersUsed->writtenFPR1 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r.regR; + registersUsed->writtenGPR1 = op_fpr_r_r.regR; } else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || @@ -407,8 +403,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) { // operand read, result read - registersUsed->readFPR1 = op_fpr_r_r.regA; - registersUsed->readFPR2 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r.regR; } else cemu_assert_unimplemented(); @@ -416,16 +412,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { // fpr operation - registersUsed->readFPR1 = op_fpr_r_r_r.regA; - registersUsed->readFPR2 = op_fpr_r_r_r.regB; - registersUsed->writtenFPR1 = op_fpr_r_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r_r.regB; + registersUsed->writtenGPR1 = op_fpr_r_r_r.regR; // handle partially written result switch (operation) { case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: case PPCREC_IML_OP_FPR_ADD_BOTTOM: case PPCREC_IML_OP_FPR_SUB_BOTTOM: - registersUsed->readFPR4 = op_fpr_r_r_r.regR; + registersUsed->readGPR3 = op_fpr_r_r_r.regR; break; case PPCREC_IML_OP_FPR_SUB_PAIR: break; @@ -436,15 +432,15 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { // fpr operation - registersUsed->readFPR1 = op_fpr_r_r_r_r.regA; - registersUsed->readFPR2 = op_fpr_r_r_r_r.regB; - registersUsed->readFPR3 = op_fpr_r_r_r_r.regC; - registersUsed->writtenFPR1 = op_fpr_r_r_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r_r_r.regB; + registersUsed->readGPR3 = op_fpr_r_r_r_r.regC; + registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR; // handle partially written result switch (operation) { case PPCREC_IML_OP_FPR_SELECT_BOTTOM: - registersUsed->readFPR4 = op_fpr_r_r_r_r.regR; + registersUsed->readGPR4 = op_fpr_r_r_r_r.regR; break; case PPCREC_IML_OP_FPR_SUM0: case PPCREC_IML_OP_FPR_SUM1: @@ -464,8 +460,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR) { - registersUsed->readFPR1 = op_fpr_r.regR; - registersUsed->writtenFPR1 = op_fpr_r.regR; + registersUsed->readGPR1 = op_fpr_r.regR; + registersUsed->writtenGPR1 = op_fpr_r.regR; } else cemu_assert_unimplemented(); @@ -473,8 +469,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { registersUsed->writtenGPR1 = op_fpr_compare.regR; - registersUsed->readFPR1 = op_fpr_compare.regA; - registersUsed->readFPR2 = op_fpr_compare.regB; + registersUsed->readGPR1 = op_fpr_compare.regA; + registersUsed->readGPR2 = op_fpr_compare.regB; } else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 677da5c2d..28c48569c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -335,7 +335,6 @@ struct IMLUsedRegisters { IMLUsedRegisters() {}; - // GPR union { struct @@ -343,23 +342,11 @@ struct IMLUsedRegisters IMLReg readGPR1; IMLReg readGPR2; IMLReg readGPR3; + IMLReg readGPR4; IMLReg writtenGPR1; IMLReg writtenGPR2; }; }; - // FPR - union - { - struct - { - // note: If destination operand is not fully written (PS0 and PS1) it will be added to the read registers - IMLReg readFPR1; - IMLReg readFPR2; - IMLReg readFPR3; - IMLReg readFPR4; - IMLReg writtenFPR1; - }; - }; bool IsWrittenByRegId(IMLRegID regId) const { @@ -377,17 +364,6 @@ struct IMLUsedRegisters return IsWrittenByRegId(regId); } - bool IsRegIdRead(IMLRegID regId) const - { - if (readGPR1.IsValid() && readGPR1.GetRegID() == regId) - return true; - if (readGPR2.IsValid() && readGPR2.GetRegID() == regId) - return true; - if (readGPR3.IsValid() && readGPR3.GetRegID() == regId) - return true; - return false; - } - template void ForEachWrittenGPR(Fn F) const { @@ -406,27 +382,8 @@ struct IMLUsedRegisters F(readGPR2); if (readGPR3.IsValid()) F(readGPR3); - } - - // temporary (for FPRs) - template - void ForEachWrittenFPR(Fn F) const - { - if (writtenFPR1.IsValid()) - F(writtenFPR1); - } - - template - void ForEachReadFPR(Fn F) const - { - if (readFPR1.IsValid()) - F(readFPR1); - if (readFPR2.IsValid()) - F(readFPR2); - if (readFPR3.IsValid()) - F(readFPR3); - if (readFPR4.IsValid()) - F(readFPR4); + if (readGPR4.IsValid()) + F(readGPR4); } template @@ -439,21 +396,12 @@ struct IMLUsedRegisters F(readGPR2, false); if (readGPR3.IsValid()) F(readGPR3, false); + if (readGPR4.IsValid()) + F(readGPR4, false); if (writtenGPR1.IsValid()) F(writtenGPR1, true); if (writtenGPR2.IsValid()) F(writtenGPR2, true); - // FPRs - if (readFPR1.IsValid()) - F(readFPR1, false); - if (readFPR2.IsValid()) - F(readFPR2, false); - if (readFPR3.IsValid()) - F(readFPR3, false); - if (readFPR4.IsValid()) - F(readFPR4, false); - if (writtenFPR1.IsValid()) - F(writtenFPR1, true); } }; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index e5bec6c0f..cb61fecf2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -57,15 +57,15 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI // check if FPR is overwritten (we can actually ignore read operations?) imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenFPR1.IsValidAndSameRegID(fprIndex)) + if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR1.IsValidAndSameRegID(fprIndex)) + if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR2.IsValidAndSameRegID(fprIndex)) + if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR3.IsValidAndSameRegID(fprIndex)) + if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex)) break; - if (registersUsed.readFPR4.IsValidAndSameRegID(fprIndex)) + if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex)) break; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index c7764fa3e..910d60e77 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1502,22 +1502,12 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML raLivenessRange* subrange = regToSubrange.find(gprId)->second; IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); }); - gprTracking.ForEachReadFPR([&](IMLReg gprReg) { - IMLRegID gprId = gprReg.GetRegID(); - raLivenessRange* subrange = regToSubrange.find(gprId)->second; - IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); - }); pos = {(sint32)index, false}; gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) { IMLRegID gprId = gprReg.GetRegID(); raLivenessRange* subrange = regToSubrange.find(gprId)->second; IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); }); - gprTracking.ForEachWrittenFPR([&](IMLReg gprReg) { - IMLRegID gprId = gprReg.GetRegID(); - raLivenessRange* subrange = regToSubrange.find(gprId)->second; - IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); - }); // check fixed register requirements IMLFixedRegisters fixedRegs; GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 1ad411904..733c6e5a5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -334,9 +334,6 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); - //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); - return true; } From f309d5d8a8cc95872fa7630ec23828601a67f763 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 27 Oct 2024 14:49:24 +0100 Subject: [PATCH 58/64] PPCRec: Code cleanup --- .../Recompiler/BackendX64/BackendX64.cpp | 49 ---- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 14 +- .../Recompiler/IML/IMLInstruction.cpp | 259 +----------------- .../Espresso/Recompiler/IML/IMLInstruction.h | 24 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 164 ++++++----- .../Recompiler/IML/IMLRegisterAllocator.h | 7 +- .../IML/IMLRegisterAllocatorRanges.cpp | 154 +++++------ .../IML/IMLRegisterAllocatorRanges.h | 18 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 21 +- .../Recompiler/PPCRecompilerImlGen.cpp | 1 - 10 files changed, 188 insertions(+), 523 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index cced18c4f..5ef713b9b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -172,18 +172,6 @@ void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFun return PPCInterpreter_getCurrentInstance(); } -void ATTR_MS_ABI PPCRecompiler_getTBL(PPCInterpreter_t* hCPU, uint32 gprIndex) -{ - uint64 coreTime = coreinit::coreinit_getTimerTick(); - hCPU->gpr[gprIndex] = (uint32)(coreTime&0xFFFFFFFF); -} - -void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) -{ - uint64 coreTime = coreinit::coreinit_getTimerTick(); - hCPU->gpr[gprIndex] = (uint32)((coreTime>>32)&0xFFFFFFFF); -} - bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) @@ -340,43 +328,6 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); return true; } - else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) - { - // according to MS ABI the caller needs to save: - // RAX, RCX, RDX, R8, R9, R10, R11 - - uint32 ppcAddress = imlInstruction->op_macro.param; - uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; - uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; - // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); - // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, X86_REG_RSP); - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, gprIndex); - // restore stackpointer to original RSP - x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - // push hCPU on stack - x64Gen_push_reg64(x64GenContext, X86_REG_RCX); - // reserve space on stack for call parameters - x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11 + 8); - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0); - // call function - if( sprId == SPR_TBL ) - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_getTBL); - else if( sprId == SPR_TBU ) - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_getTBU); - else - assert_dbg(); - x64Gen_call_reg64(x64GenContext, X86_REG_RAX); - // restore hCPU from stack - x64Gen_add_reg64_imm32(x64GenContext, X86_REG_RSP, 8 * 11 + 8); - x64Gen_pop_reg64(x64GenContext, X86_REG_RSP); - // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); - // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); - return true; - } else { debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 4850ed816..1cfb470de 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -97,23 +97,23 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - if (subrangeItr->interval2.start.GetInstructionIndexEx() == offset) + if (subrangeItr->interval.start.GetInstructionIndexEx() == offset) { - if(subrangeItr->interval2.start.IsInstructionIndex() && !subrangeItr->interval2.start.IsOnInputEdge()) + if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge()) currentLineText.add("."); else currentLineText.add("|"); currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister()); } - else if (subrangeItr->interval2.end.GetInstructionIndexEx() == offset) + else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset) { - if(subrangeItr->interval2.end.IsInstructionIndex() && !subrangeItr->interval2.end.IsOnOutputEdge()) + if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge()) currentLineText.add("* "); else currentLineText.add("| "); } - else if (subrangeItr->interval2.ContainsInstructionIndexEx(offset)) + else if (subrangeItr->interval.ContainsInstructionIndexEx(offset)) { currentLineText.add("| "); } @@ -374,10 +374,6 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di { strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if (inst.operation == PPCREC_IML_MACRO_MFTB) - { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) { strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 63714cdb3..665480277 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -4,6 +4,7 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" +// return true if an instruction has side effects on top of just reading and writing registers bool IMLInstruction::HasSideEffects() const { bool hasSideEffects = true; @@ -160,7 +161,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_MACRO) { - if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB) + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE) { // no effect on registers } @@ -482,15 +483,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } } -//#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) -IMLReg replaceRegisterId(IMLReg reg, IMLRegID oldId, IMLRegID newId) -{ - if (reg.GetRegID() != oldId) - return reg; - reg.SetRegID(newId); - return reg; -} - IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable) { if (reg.IsInvalid()) @@ -502,26 +494,6 @@ IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable) { if (type == PPCREC_IML_TYPE_R_NAME) @@ -594,7 +566,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr } else if (type == PPCREC_IML_TYPE_MACRO) { - if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_MFTB || operation == PPCREC_IML_MACRO_COUNT_CYCLES) + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES) { // no effect on registers } @@ -717,228 +689,3 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr cemu_assert_unimplemented(); } } - -void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]) -{ - if (type == PPCREC_IML_TYPE_R_NAME) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_NAME_R) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_S32) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R_S32) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R_R) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_NO_OP) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_MACRO) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_LOAD) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_STORE) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_STORE_INDEXED) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) - { - ; - } - else if (type == PPCREC_IML_TYPE_CALL_IMM) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_FPR_LOAD) - { - op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_STORE) - { - op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R) - { - op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R_R) - { - op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R) - { - op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_COMPARE) - { - op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} - -void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced) -{ - if (type == PPCREC_IML_TYPE_R_NAME) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_NAME_R) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_S32) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_R_R_S32_CARRY) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_R_R_R || type == PPCREC_IML_TYPE_R_R_R_CARRY) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_NO_OP) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_MACRO) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_LOAD) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_STORE) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_STORE_INDEXED) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_CALL_IMM) - { - // not affected - } - else if (type == PPCREC_IML_TYPE_FPR_LOAD) - { - op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_STORE) - { - op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R) - { - op_fpr_r_r.regR = replaceRegisterId(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r.regA = replaceRegisterId(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R_R) - { - op_fpr_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - op_fpr_r_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced); - op_fpr_r_r_r_r.regC = replaceRegisterId(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced); - } - else if (type == PPCREC_IML_TYPE_FPR_R) - { - op_fpr_r.regR = replaceRegisterId(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 28c48569c..e58511c1b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -197,7 +197,6 @@ enum PPCREC_IML_MACRO_B_FAR, // branch to different function PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount PPCREC_IML_MACRO_HLE, // HLE function call - PPCREC_IML_MACRO_MFTB, // get TB register value (low or high) PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter // debugging PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak @@ -335,19 +334,6 @@ struct IMLUsedRegisters { IMLUsedRegisters() {}; - union - { - struct - { - IMLReg readGPR1; - IMLReg readGPR2; - IMLReg readGPR3; - IMLReg readGPR4; - IMLReg writtenGPR1; - IMLReg writtenGPR2; - }; - }; - bool IsWrittenByRegId(IMLRegID regId) const { if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) @@ -404,6 +390,12 @@ struct IMLUsedRegisters F(writtenGPR2, true); } + IMLReg readGPR1; + IMLReg readGPR2; + IMLReg readGPR3; + IMLReg readGPR4; + IMLReg writtenGPR1; + IMLReg writtenGPR2; }; struct IMLInstruction @@ -575,7 +567,6 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || - type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || @@ -788,9 +779,6 @@ struct IMLInstruction bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used void RewriteGPR(const std::unordered_map& translationTable); - void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); - void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced); - }; // architecture specific constants diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 910d60e77..d411be14e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -192,7 +192,7 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe } #endif -uint32 PPCRecRA_getNextIterationIndex() +uint32 IMLRA_GetNextIterationIndex() { static uint32 recRACurrentIterationIndex = 0; recRACurrentIterationIndex++; @@ -231,9 +231,9 @@ bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex return currentSegment->raInfo.isPartOfProcessedLoop; } -void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase) +void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase) { - uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex; if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase)) { @@ -241,7 +241,7 @@ void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSe } } -void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { if (imlSegment->nextSegmentIsUncertain) return; @@ -255,13 +255,13 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml // check if this segment has a branch that goes backwards (potential complex loop) if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex) { - PPCRecRA_detectLoop(ppcImlGenContext, imlSegment); + IMLRA_DetectLoop(ppcImlGenContext, imlSegment); } } #define SUBRANGE_LIST_SIZE (128) -sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition) +sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition) { for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++) { @@ -292,8 +292,8 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLi return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw(); } } - cemu_assert_debug(range->interval2.end.IsInstructionIndex()); - return range->interval2.end.GetRaw() - startPosition.GetRaw(); + cemu_assert_debug(range->interval.end.IsInstructionIndex()); + return range->interval.end.GetRaw() - startPosition.GetRaw(); } sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister) @@ -343,15 +343,15 @@ sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegmen subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if (subrangeItr->interval2.ContainsEdge(startPosition)) + if (subrangeItr->interval.ContainsEdge(startPosition)) return 0; - if (subrangeItr->interval2.end < startPosition) + if (subrangeItr->interval.end < startPosition) { subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - cemu_assert_debug(startPosition <= subrangeItr->interval2.start); - sint32 currentDist = subrangeItr->interval2.start.GetRaw() - startPosition.GetRaw(); + cemu_assert_debug(startPosition <= subrangeItr->interval.start); + sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw(); minDistance = std::min(minDistance, currentDist); subrangeItr = subrangeItr->link_allSegmentRanges.next; } @@ -377,7 +377,7 @@ struct IMLRALivenessTimeline for (size_t f = 0; f < count; f++) { raLivenessRange* liverange = activeRanges[f]; - if (liverange->interval2.end < expireUpTo) // this was <= but since end is not inclusive we need to use < + if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use < { #ifdef CEMU_DEBUG_ASSERT if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) @@ -443,7 +443,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } - if (subrange->interval2.IsOverlapping(subrangeItr->interval2)) + if (subrange->interval.IsOverlapping(subrangeItr->interval)) { if (subrangeItr->GetPhysicalRegister() >= 0) physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); @@ -456,7 +456,7 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { - return lhs->interval2.start < rhs->interval2.start; + return lhs->interval.start < rhs->interval.start; } void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) @@ -467,8 +467,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - if (count >= 4096) - assert_dbg(); + cemu_assert(count < 4096); subrangeList[count] = subrangeItr; count++; // next @@ -526,6 +525,9 @@ raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) struct raFixedRegRequirementWithVGPR { + raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId) + : pos(pos), allowedReg(allowedReg), regId(regId) {} + raInstructionEdge pos; IMLPhysRegisterSet allowedReg; IMLRegID regId; @@ -560,7 +562,7 @@ boost::container::small_vector IMLRA_GetRangeWithFixedRegRe boost::container::small_vector rangeList; for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) { - if (!currentRange->interval2.ContainsEdge(pos)) + if (!currentRange->interval.ContainsEdge(pos)) continue; IMLPhysRegisterSet allowedRegs; if (!currentRange->GetAllowedRegistersEx(allowedRegs)) @@ -574,7 +576,7 @@ boost::container::small_vector IMLRA_GetRangeWithFixedRegRe void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border - // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This isn't always necessary + // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) { IMLPhysRegisterSet allowedRegs; @@ -588,10 +590,10 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment currentRange = currentRange->link_allSegmentRanges.next; continue; } - if (currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment()) { raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next; - PPCRecRA_explodeRange(ppcImlGenContext, currentRange); + IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange); currentRange = nextRange; continue; } @@ -638,9 +640,9 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment for (auto& range : overlappingRanges) { - if (range->interval2.start < entry.pos) + if (range->interval.start < entry.pos) { - PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true); + IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true); } } } @@ -704,7 +706,7 @@ void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge start distance = endPos.GetRaw() - startPos.GetRaw(); } -void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx); +static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx); class RASpillStrategy { @@ -737,8 +739,8 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) { - raInstructionEdge currentRangeStart = currentRange->interval2.start; - sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + raInstructionEdge currentRangeStart = currentRange->interval.start; + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); cemu_assert_debug(localRangeHoleCutting.distance == -1); cemu_assert_debug(strategyCost == INT_MAX); if (!currentRangeStart.ConnectsToPreviousSegment()) @@ -746,7 +748,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy cemu_assert_debug(currentRangeStart.GetRaw() >= 0); for (auto candidate : timeline.activeRanges) { - if (candidate->interval2.ExtendsIntoNextSegment()) + if (candidate->interval.ExtendsIntoNextSegment()) continue; // new checks (Oct 2024): if (candidate == currentRange) @@ -756,7 +758,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; - sint32 distance2 = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); + sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart); IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2); if (distance2 < 2) continue; @@ -785,18 +787,18 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override { cemu_assert_debug(strategyCost != INT_MAX); - sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); - raInstructionEdge currentRangeStart = currentRange->interval2.start; + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + raInstructionEdge currentRangeStart = currentRange->interval.start; raInstructionEdge holeStartPosition = currentRangeStart; raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance; raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange; - if (collisionRange->interval2.start < holeStartPosition) + if (collisionRange->interval.start < holeStartPosition) { - collisionRange = PPCRecRA_splitLocalSubrange2(nullptr, collisionRange, holeStartPosition, true); - cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point - cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough + collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true); + cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point + cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough } else { @@ -805,7 +807,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy // we may also have to cut the current range to fit partially into the hole if (requiredSize2 > localRangeHoleCutting.distance) { - raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true); + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true); if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers @@ -815,7 +817,7 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy // verify that the hole is large enough if (collisionRange) { - cemu_assert_debug(!collisionRange->interval2.IsOverlapping(currentRange->interval2)); + cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval)); } } @@ -840,9 +842,9 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs) { - sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); - raInstructionEdge currentRangeStart = currentRange->interval2.start; + raInstructionEdge currentRangeStart = currentRange->interval.start; cemu_assert_debug(strategyCost == INT_MAX); availableRegisterHole.distance = -1; availableRegisterHole.physRegister = -1; @@ -888,9 +890,9 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override { cemu_assert_debug(strategyCost != INT_MAX); - raInstructionEdge currentRangeStart = currentRange->interval2.start; + raInstructionEdge currentRangeStart = currentRange->interval.start; // use available register - raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true); + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true); if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers @@ -918,16 +920,16 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) { - raInstructionEdge currentRangeStart = currentRange->interval2.start; + raInstructionEdge currentRangeStart = currentRange->interval.start; if (currentRangeStart.ConnectsToPreviousSegment()) currentRangeStart.Set(0, true); - sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); cemu_assert_debug(strategyCost == INT_MAX); explodeRange.range = nullptr; explodeRange.distance = -1; for (auto candidate : timeline.activeRanges) { - if (!candidate->interval2.ExtendsIntoNextSegment()) + if (!candidate->interval.ExtendsIntoNextSegment()) continue; // new checks (Oct 2024): if (candidate == currentRange) @@ -937,7 +939,7 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) continue; - sint32 distance = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart); + sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart); IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); if (distance < 2) continue; @@ -961,16 +963,16 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override { - raInstructionEdge currentRangeStart = currentRange->interval2.start; + raInstructionEdge currentRangeStart = currentRange->interval.start; if (currentRangeStart.ConnectsToPreviousSegment()) currentRangeStart.Set(0, true); - sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance(); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); // explode range - PPCRecRA_explodeRange(nullptr, explodeRange.range); + IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range); // split current subrange if necessary if (requiredSize2 > explodeRange.distance) { - raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + explodeRange.distance, true); + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true); if (tailRange) { cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers @@ -1005,7 +1007,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1); for (auto candidate : timeline.activeRanges) { - if (!candidate->interval2.ExtendsIntoNextSegment()) + if (!candidate->interval.ExtendsIntoNextSegment()) continue; // only select candidates that clash with current subrange if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange) @@ -1037,7 +1039,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override { cemu_assert_debug(strategyCost != INT_MAX); - PPCRecRA_explodeRange(ctx, explodeRange.range); + IMLRA_ExplodeRangeCluster(ctx, explodeRange.range); } private: @@ -1056,16 +1058,16 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato if (seg->imlList.empty()) return; // there can be no fixed register requirements if there are no instructions - raInstructionEdge firstPos = currentRange->interval2.start; - if (currentRange->interval2.start.ConnectsToPreviousSegment()) + raInstructionEdge firstPos = currentRange->interval.start; + if (currentRange->interval.start.ConnectsToPreviousSegment()) firstPos.SetRaw(0); - else if (currentRange->interval2.start.ConnectsToNextSegment()) + else if (currentRange->interval.start.ConnectsToNextSegment()) firstPos.Set(seg->imlList.size() - 1, false); - raInstructionEdge lastPos = currentRange->interval2.end; - if (currentRange->interval2.end.ConnectsToPreviousSegment()) + raInstructionEdge lastPos = currentRange->interval.end; + if (currentRange->interval.end.ConnectsToPreviousSegment()) lastPos.SetRaw(0); - else if (currentRange->interval2.end.ConnectsToNextSegment()) + else if (currentRange->interval.end.ConnectsToNextSegment()) lastPos.Set(seg->imlList.size() - 1, false); cemu_assert_debug(firstPos <= lastPos); @@ -1093,7 +1095,7 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) { cemu_assert_debug(currentRange->imlSegment == imlSegment); - if (currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment()) + if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment()) { auto clusterRanges = currentRange->GetAllSubrangesInCluster(); for (auto& rangeIt : clusterRanges) @@ -1128,7 +1130,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon while (subrangeItr) { - raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor + raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor PPCRecRA_debugValidateSubrange(subrangeItr); livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges @@ -1204,7 +1206,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon selectedStrategy = &newStrategy; }; - if (!subrangeItr->interval2.ExtendsIntoNextSegment()) + if (!subrangeItr->interval.ExtendsIntoNextSegment()) { // range ends in current segment, use local strategies // evaluate strategy: Cut hole into local subrange @@ -1232,9 +1234,9 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon else { // none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy - cemu_assert_debug(subrangeItr->interval2.ExtendsPreviousSegment()); + cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment()); // alternative strategy if we have no other choice: explode current range - PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); + IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr); } return false; } @@ -1336,7 +1338,7 @@ void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); + IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment); } } @@ -1411,7 +1413,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive raInterval interval; interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true); - raLivenessRange* subrange = PPCRecRA_createSubrange2(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end); + raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end); // traverse forward if (abstractRange->usageEnd == RA_INTER_RANGE_END) { @@ -1422,7 +1424,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx { subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name); subrange->subrangeBranchTaken->previousRanges.push_back(subrange); - cemu_assert_debug(subrange->subrangeBranchTaken->interval2.ExtendsPreviousSegment()); + cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment()); } } if (imlSegment->nextSegmentBranchNotTaken) @@ -1432,7 +1434,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx { subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name); subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange); - cemu_assert_debug(subrange->subrangeBranchNotTaken->interval2.ExtendsPreviousSegment()); + cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment()); } } } @@ -1771,7 +1773,7 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange) subrange->_noLoad = isOverwritten; subrange->hasStore = isWritten; - if (subrange->interval2.ExtendsPreviousSegment()) + if (subrange->interval.ExtendsPreviousSegment()) subrange->_noLoad = true; } @@ -1796,7 +1798,7 @@ void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, if (subrange->hasStoreDelayed) return; // no need to traverse this subrange IMLSegment* imlSegment = subrange->imlSegment; - if (!subrange->interval2.ExtendsIntoNextSegment()) + if (!subrange->interval.ExtendsIntoNextSegment()) { // ending segment if (info->subrangeCount >= SUBRANGE_LIST_SIZE) @@ -1839,7 +1841,7 @@ void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) { - if (!subrange->interval2.ExtendsIntoNextSegment()) + if (!subrange->interval.ExtendsIntoNextSegment()) return; // analyze data flow across segments (if this segment has writes) if (subrange->hasStore) @@ -1847,7 +1849,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) subrangeEndingInfo_t writeEndingInfo; writeEndingInfo.subrangeCount = 0; writeEndingInfo.hasUndefinedEndings = false; - _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); + _findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo); if (writeEndingInfo.hasUndefinedEndings == false) { // get cost of delaying store into endings @@ -1924,7 +1926,7 @@ void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSeg { currentEdge.Set(i, false); // set to instruction index on output edge // activate ranges which begin before or during this instruction - while (currentRange && currentRange->interval2.start <= currentEdge) + while (currentRange && currentRange->interval.start <= currentEdge) { cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict @@ -1938,7 +1940,7 @@ void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSeg auto it = activeRanges.begin(); while (it != activeRanges.end()) { - if ((*it)->interval2.end <= currentEdge) + if ((*it)->interval.end <= currentEdge) { virtId2PhysReg.erase((*it)->GetVirtualRegister()); it = activeRanges.erase(it); @@ -1981,7 +1983,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM { if (!currentRange->_noLoad) { - cemu_assert_debug(currentRange->interval2.ExtendsIntoNextSegment()); + cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment()); rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); } currentRange = currentRange->link_allSegmentRanges.next; @@ -2001,9 +2003,9 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM while (currentRange) { - if (!currentRange->interval2.IsNextSegmentOnly() && currentRange->interval2.end > edge) + if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge) { - currentRange->interval2.SetEnd(edge); + currentRange->interval.SetEnd(edge); } currentRange = currentRange->link_allSegmentRanges.next; } @@ -2025,9 +2027,8 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM boost::container::small_vector activeRanges; // first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; - // make all ranges active that start on RA_INTER_RANGE_START - while (currentRange && currentRange->interval2.start.ConnectsToPreviousSegment()) + while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment()) { activeRanges.push_back(currentRange); currentRange = currentRange->link_allSegmentRanges.next; @@ -2038,7 +2039,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM firstOutputEdge.Set(0, false); while (it != activeRanges.end()) { - if ((*it)->interval2.end < firstOutputEdge) + if ((*it)->interval.end < firstOutputEdge) { raLivenessRange* storedRange = *it; if (storedRange->hasStore) @@ -2055,7 +2056,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM raInstructionEdge curEdge; // input edge curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction - while (currentRange && currentRange->interval2.start <= curEdge) + while (currentRange && currentRange->interval.start <= curEdge) { if (!currentRange->_noLoad) { @@ -2072,11 +2073,9 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM auto it = activeRanges.begin(); while (it != activeRanges.end()) { - if ((*it)->interval2.end <= curEdge) + if ((*it)->interval.end <= curEdge) { // range expires - // we cant erase it from virtId2PhysReg right away because a store might happen before the last use (the +1 thing above) - // todo - check hasStore raLivenessRange* storedRange = *it; if (storedRange->hasStore) @@ -2084,7 +2083,6 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); } - it = activeRanges.erase(it); continue; } @@ -2109,7 +2107,7 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM } while (currentRange) { - cemu_assert_debug(currentRange->interval2.IsNextSegmentOnly()); + cemu_assert_debug(currentRange->interval.IsNextSegmentOnly()); cemu_assert_debug(!currentRange->_noLoad); rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); currentRange = currentRange->link_allSegmentRanges.next; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index b5a7610b2..0a54e4cbd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -60,10 +60,10 @@ class IMLPhysRegisterSet } // returns index of first available register. Do not call when HasAnyAvailable() == false - uint32 GetFirstAvailableReg() + IMLPhysReg GetFirstAvailableReg() { cemu_assert_debug(m_regBitmask != 0); - uint32 regIndex = 0; + sint32 regIndex = 0; auto tmp = m_regBitmask; while ((tmp & 0xFF) == 0) { @@ -80,7 +80,7 @@ class IMLPhysRegisterSet // returns index of next available register (search includes any register index >= startIndex) // returns -1 if there is no more register - sint32 GetNextAvailableReg(sint32 startIndex) const + IMLPhysReg GetNextAvailableReg(sint32 startIndex) const { if (startIndex >= 64) return -1; @@ -111,7 +111,6 @@ class IMLPhysRegisterSet uint64 m_regBitmask{ 0 }; }; - struct IMLRegisterAllocatorParameters { inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 2c930651b..1ac884cd8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -3,7 +3,7 @@ #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" -uint32 PPCRecRA_getNextIterationIndex(); +uint32 IMLRA_GetNextIterationIndex(); IMLRegID raLivenessRange::GetVirtualRegister() const { @@ -20,12 +20,12 @@ IMLName raLivenessRange::GetName() const return name; } -void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister) +void raLivenessRange::SetPhysicalRegister(IMLPhysReg physicalRegister) { this->physicalRegister = physicalRegister; } -void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister) +void raLivenessRange::SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister) { auto clusterRanges = GetAllSubrangesInCluster(); for(auto& range : clusterRanges) @@ -34,7 +34,7 @@ void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister) boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() { - uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); boost::container::small_vector subranges; subranges.push_back(this); this->lastIterationIndex = iterationIndex; @@ -87,7 +87,7 @@ void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uin bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters) { - uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); allowedRegisters.SetAllAvailable(); GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters); return !allowedRegisters.HasAllAvailable(); @@ -96,7 +96,7 @@ bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool) { IMLPhysRegisterSet fixedRegRequirements = regPool; - if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment()) + if(interval.ExtendsPreviousSegment() || interval.ExtendsIntoNextSegment()) { auto clusterRanges = GetAllSubrangesInCluster(); for(auto& subrange : clusterRanges) @@ -203,7 +203,7 @@ void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenes MemoryPoolPermanentObjects memPool_livenessSubrange(4096); // startPosition and endPosition are inclusive -raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition) +raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition) { raLivenessRange* range = memPool_livenessSubrange.acquireObj(); range->previousRanges.clear(); @@ -212,8 +212,8 @@ raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, range->imlSegment = imlSegment; cemu_assert_debug(startPosition <= endPosition); - range->interval2.start = startPosition; - range->interval2.end = endPosition; + range->interval.start = startPosition; + range->interval.end = endPosition; // register mapping range->virtualRegister = virtualRegister; @@ -233,42 +233,42 @@ raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, return range; } -void _unlinkSubrange(raLivenessRange* subrange) +void _unlinkSubrange(raLivenessRange* range) { - IMLSegment* imlSegment = subrange->imlSegment; - PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange); - PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange); + IMLSegment* imlSegment = range->imlSegment; + PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); + PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range); // unlink reverse references - if(subrange->subrangeBranchTaken) - subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange)); - if(subrange->subrangeBranchNotTaken) - subrange->subrangeBranchNotTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange)); - subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; - subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; + if(range->subrangeBranchTaken) + range->subrangeBranchTaken->previousRanges.erase(std::find(range->subrangeBranchTaken->previousRanges.begin(), range->subrangeBranchTaken->previousRanges.end(), range)); + if(range->subrangeBranchNotTaken) + range->subrangeBranchNotTaken->previousRanges.erase(std::find(range->subrangeBranchNotTaken->previousRanges.begin(), range->subrangeBranchNotTaken->previousRanges.end(), range)); + range->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; + range->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; // remove forward references - for(auto& prev : subrange->previousRanges) + for(auto& prev : range->previousRanges) { - if(prev->subrangeBranchTaken == subrange) + if(prev->subrangeBranchTaken == range) prev->subrangeBranchTaken = nullptr; - if(prev->subrangeBranchNotTaken == subrange) + if(prev->subrangeBranchNotTaken == range) prev->subrangeBranchNotTaken = nullptr; } - subrange->previousRanges.clear(); + range->previousRanges.clear(); } -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) +void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range) { - _unlinkSubrange(subrange); - subrange->list_accessLocations.clear(); - subrange->list_fixedRegRequirements.clear(); - memPool_livenessSubrange.releaseObj(subrange); + _unlinkSubrange(range); + range->list_accessLocations.clear(); + range->list_fixedRegRequirements.clear(); + memPool_livenessSubrange.releaseObj(range); } -void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) +void IMLRA_DeleteRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range) { - auto clusterRanges = subrange->GetAllSubrangesInCluster(); + auto clusterRanges = range->GetAllSubrangesInCluster(); for (auto& subrange : clusterRanges) - PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); + IMLRA_DeleteRange(ppcImlGenContext, subrange); } void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) @@ -277,13 +277,13 @@ void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) { raLivenessRange* cur; while(cur = seg->raInfo.linkedList_allSubranges) - PPCRecRA_deleteSubrange(ppcImlGenContext, cur); + IMLRA_DeleteRange(ppcImlGenContext, cur); seg->raInfo.linkedList_allSubranges = nullptr; seg->raInfo.linkedList_perVirtualRegister.clear(); } } -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange) +void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange) { #ifdef CEMU_DEBUG_ASSERT PPCRecRA_debugValidateSubrange(subrange); @@ -322,17 +322,17 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan subrange->list_fixedRegRequirements.push_back(fixedReg); absorbedSubrange->list_fixedRegRequirements.clear(); - subrange->interval2.end = absorbedSubrange->interval2.end; + subrange->interval.end = absorbedSubrange->interval.end; PPCRecRA_debugValidateSubrange(subrange); - PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange); + IMLRA_DeleteRange(ppcImlGenContext, absorbedSubrange); } -// remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges) -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) +// remove all inter-segment connections from the range cluster and split it into local ranges. Ranges are trimmed and if they have no access location they will be removed +void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) { - cemu_assert_debug(originRange->interval2.ExtendsPreviousSegment() || originRange->interval2.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments + cemu_assert_debug(originRange->interval.ExtendsPreviousSegment() || originRange->interval.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments auto clusterRanges = originRange->GetAllSubrangesInCluster(); for (auto& subrange : clusterRanges) { @@ -340,7 +340,7 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange continue; raInterval interval; interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos); - raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); + raLivenessRange* newSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); // copy locations and fixed reg indices newSubrange->list_accessLocations = subrange->list_accessLocations; newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements; @@ -351,17 +351,17 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange // validate if(!newSubrange->list_accessLocations.empty()) { - cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval2.start); - cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval2.end); + cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval.start); + cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval.end); } if(!newSubrange->list_fixedRegRequirements.empty()) { - cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval2.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters() - cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval2.end); + cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters() + cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval.end); } } - // remove subranges - PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange); + // delete the original range cluster + IMLRA_DeleteRangeCluster(ppcImlGenContext, originRange); } #ifdef CEMU_DEBUG_ASSERT @@ -408,19 +408,19 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range) if(range->list_accessLocations.empty()) { // special case where we trim ranges extending from other segments to a single instruction edge - cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex()); - if(range->interval2.start.IsInstructionIndex()) - range->interval2.start = range->interval2.end; - if(range->interval2.end.IsInstructionIndex()) - range->interval2.end = range->interval2.start; + cemu_assert_debug(!range->interval.start.IsInstructionIndex() || !range->interval.end.IsInstructionIndex()); + if(range->interval.start.IsInstructionIndex()) + range->interval.start = range->interval.end; + if(range->interval.end.IsInstructionIndex()) + range->interval.end = range->interval.start; return; } // trim start and end - raInterval prevInterval = range->interval2; - if(range->interval2.start.IsInstructionIndex()) - range->interval2.start = range->list_accessLocations.front().pos; - if(range->interval2.end.IsInstructionIndex()) - range->interval2.end = range->list_accessLocations.back().pos; + raInterval prevInterval = range->interval; + if(range->interval.start.IsInstructionIndex()) + range->interval.start = range->list_accessLocations.front().pos; + if(range->interval.end.IsInstructionIndex()) + range->interval.end = range->list_accessLocations.back().pos; // extra checks #ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(range->interval2.start <= range->interval2.end); @@ -438,22 +438,20 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range) // tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange // if head has a physical register assigned it will not carry over to tail // The return value is the tail range -// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within them -// the range after the split point does not inherit the physical register -// if trimToHole is true and any of the halfes is empty, it will be deleted -raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole) +// If trimToUsage is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within. If there are no locations then the range will be deleted +raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage) { cemu_assert_debug(splitPosition.IsInstructionIndex()); - cemu_assert_debug(!subrange->interval2.IsNextSegmentOnly() && !subrange->interval2.IsPreviousSegmentOnly()); - cemu_assert_debug(subrange->interval2.ContainsEdge(splitPosition)); + cemu_assert_debug(!subrange->interval.IsNextSegmentOnly() && !subrange->interval.IsPreviousSegmentOnly()); + cemu_assert_debug(subrange->interval.ContainsEdge(splitPosition)); // determine new intervals raInterval headInterval, tailInterval; - headInterval.SetInterval(subrange->interval2.start, splitPosition-1); - tailInterval.SetInterval(splitPosition, subrange->interval2.end); + headInterval.SetInterval(subrange->interval.start, splitPosition-1); + tailInterval.SetInterval(splitPosition, subrange->interval.end); cemu_assert_debug(headInterval.start <= headInterval.end); cemu_assert_debug(tailInterval.start <= tailInterval.end); // create tail - raLivenessRange* tailSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end); + raLivenessRange* tailSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end); tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister()); // carry over branch targets and update reverse references tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken; @@ -505,23 +503,23 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte } } // adjust intervals - subrange->interval2 = headInterval; - tailSubrange->interval2 = tailInterval; + subrange->interval = headInterval; + tailSubrange->interval = tailInterval; // trim to hole - if(trimToHole) + if(trimToUsage) { - if(subrange->list_accessLocations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex())) + if(subrange->list_accessLocations.empty() && (subrange->interval.start.IsInstructionIndex() && subrange->interval.end.IsInstructionIndex())) { - PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); + IMLRA_DeleteRange(ppcImlGenContext, subrange); subrange = nullptr; } else { IMLRA_TrimRangeToUse(subrange); } - if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex())) + if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval.start.IsInstructionIndex() && tailSubrange->interval.end.IsInstructionIndex())) { - PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange); + IMLRA_DeleteRange(ppcImlGenContext, tailSubrange); tailSubrange = nullptr; } else @@ -530,11 +528,11 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte } } // validation - cemu_assert_debug(!subrange || subrange->interval2.start <= subrange->interval2.end); - cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start <= tailSubrange->interval2.end); - cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start >= splitPosition); - if (!trimToHole) - cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start == splitPosition); + cemu_assert_debug(!subrange || subrange->interval.start <= subrange->interval.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start <= tailSubrange->interval.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start >= splitPosition); + if (!trimToUsage) + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start == splitPosition); if(subrange) PPCRecRA_debugValidateSubrange(subrange); @@ -560,8 +558,8 @@ sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) if (subrange->list_accessLocations.empty()) continue; // this range would be deleted and thus has no cost sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); - bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment(); - bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment(); + bool hasAdditionalLoad = subrange->interval.ExtendsPreviousSegment(); + bool hasAdditionalStore = subrange->interval.ExtendsIntoNextSegment(); if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary { cemu_assert_debug(!subrange->list_accessLocations.front().IsRead()); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index bc78a1155..b0685cc56 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -302,7 +302,7 @@ struct raFixedRegRequirement struct raLivenessRange { IMLSegment* imlSegment; - raInterval interval2; + raInterval interval; // dirty state tracking bool _noLoad; @@ -327,7 +327,7 @@ struct raLivenessRange IMLRegID virtualRegister; IMLName name; // register allocator result - sint32 physicalRegister; + IMLPhysReg physicalRegister; boost::container::small_vector GetAllSubrangesInCluster(); bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined @@ -337,23 +337,23 @@ struct raLivenessRange sint32 GetPhysicalRegister() const; bool HasPhysicalRegister() const { return physicalRegister >= 0; } IMLName GetName() const; - void SetPhysicalRegister(sint32 physicalRegister); - void SetPhysicalRegisterForCluster(sint32 physicalRegister); + void SetPhysicalRegister(IMLPhysReg physicalRegister); + void SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister); void UnsetPhysicalRegister() { physicalRegister = -1; } private: void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs); }; -raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); +raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); +void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); +void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); +void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); -raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false); +raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage = false); void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 733c6e5a5..762647170 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -19,6 +19,7 @@ #include "util/highresolutiontimer/HighResolutionTimer.h" #define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock +#define PPCREC_LOG_RECOMPILATION_RESULTS 0 struct PPCInvalidationRange { @@ -185,8 +186,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; +#if PPCREC_LOG_RECOMPILATION_RESULTS BenchmarkTimer bt; bt.Start(); +#endif // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; @@ -217,18 +220,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - // if (ppcRecFunc->ppcAddress == 0x2BDA9F4) - // { - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); - // } - - // Functions for testing (botw): - // 3B4049C (large with switch case) - // 30BF118 (has a bndz copy loop + some float instructions at the end) - - - // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) @@ -258,18 +249,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } +#if PPCREC_LOG_RECOMPILATION_RESULTS bt.Stop(); - - //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); - uint32 codeHash = 0; for (uint32 i = 0; i < ppcRecFunc->x86Size; i++) { codeHash = _rotr(codeHash, 3); codeHash += ((uint8*)ppcRecFunc->x86Code)[i]; } - cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); +#endif return ppcRecFunc; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index cf25128b6..9b74e45b8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -3008,7 +3008,6 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext break; case PPCREC_IML_MACRO_DEBUGBREAK: case PPCREC_IML_MACRO_COUNT_CYCLES: - case PPCREC_IML_MACRO_MFTB: break; default: cemu_assert_unimplemented(); From 099d1d4e1f08f3afcfd1b36ba50d15feac6964be Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 28 Oct 2024 09:21:42 +0100 Subject: [PATCH 59/64] PPCRec: Rework RLWIMI --- .../Recompiler/BackendX64/BackendX64.cpp | 20 -------------- .../Recompiler/IML/IMLInstruction.cpp | 15 ++--------- .../Espresso/Recompiler/IML/IMLInstruction.h | 1 - .../IML/IMLRegisterAllocatorRanges.cpp | 22 ++++++++-------- .../Recompiler/PPCRecompilerImlGen.cpp | 26 ++++++++++++------- 5 files changed, 29 insertions(+), 55 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 5ef713b9b..f60ac2b8b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1073,26 +1073,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else // XOR x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) - { - // registerResult = ((registerResult<<op_r_r_s32.immS32; - uint32 mb = (vImm>>0)&0xFF; - uint32 me = (vImm>>8)&0xFF; - uint32 sh = (vImm>>16)&0xFF; - uint32 mask = ppc_mask(mb, me); - // copy rS to temporary register - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regA); - // rotate destination register - if( sh ) - x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); - // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, regR, ~mask); - // AND temporary rS register with mask - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); - // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP); - } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 665480277..480b0d8dc 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -93,19 +93,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_R_S32) { - if (operation == PPCREC_IML_OP_RLWIMI) - { - // result and operand register are both read, result is written - registersUsed->writtenGPR1 = op_r_r_s32.regR; - registersUsed->readGPR1 = op_r_r_s32.regR; - registersUsed->readGPR2 = op_r_r_s32.regA; - } - else - { - // result is write only and operand is read only - registersUsed->writtenGPR1 = op_r_r_s32.regR; - registersUsed->readGPR1 = op_r_r_s32.regA; - } + registersUsed->writtenGPR1 = op_r_r_s32.regR; + registersUsed->readGPR1 = op_r_r_s32.regA; } else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index e58511c1b..2cd1d642d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -122,7 +122,6 @@ enum PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned) PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed) // ppc - PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 1ac884cd8..583d5905b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -290,7 +290,7 @@ void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* PPCRecRA_debugValidateSubrange(absorbedSubrange); if (subrange->imlSegment != absorbedSubrange->imlSegment) assert_dbg(); - cemu_assert_debug(subrange->interval2.end == absorbedSubrange->interval2.start); + cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start); if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken) assert_dbg(); @@ -375,23 +375,23 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range) if(range->subrangeBranchTaken || range->subrangeBranchNotTaken) { - cemu_assert_debug(range->interval2.end.ConnectsToNextSegment()); + cemu_assert_debug(range->interval.end.ConnectsToNextSegment()); } if(!range->previousRanges.empty()) { - cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment()); + cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment()); } // validate locations if (!range->list_accessLocations.empty()) { - cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval2.start); - cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval2.end); + cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start); + cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end); } // validate fixed reg requirements if (!range->list_fixedRegRequirements.empty()) { - cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval2.start); - cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval2.end); + cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start); + cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end); for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++) cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos); } @@ -423,12 +423,12 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range) range->interval.end = range->list_accessLocations.back().pos; // extra checks #ifdef CEMU_DEBUG_ASSERT - cemu_assert_debug(range->interval2.start <= range->interval2.end); + cemu_assert_debug(range->interval.start <= range->interval.end); for(auto& loc : range->list_accessLocations) { - cemu_assert_debug(range->interval2.ContainsEdge(loc.pos)); + cemu_assert_debug(range->interval.ContainsEdge(loc.pos)); } - cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2)); + cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval)); #endif } @@ -580,7 +580,7 @@ sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInst { // validation #ifdef CEMU_DEBUG_ASSERT - if (subrange->interval2.ExtendsIntoNextSegment()) + if (subrange->interval.ExtendsIntoNextSegment()) assert_dbg(); #endif cemu_assert_debug(splitPosition.IsInstructionIndex()); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 9b74e45b8..55d4a94b7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -982,12 +982,12 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - int rS, rA, SH, MB, ME; + sint32 rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 mask = ppc_mask(MB, ME); IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); if( ME == (31-SH) && MB == 0 ) { // SLWI @@ -1015,16 +1015,22 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - int rS, rA, SH, MB, ME; + sint32 rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); - - IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // pack RLWIMI parameters into single integer - uint32 vImm = MB|(ME<<8)|(SH<<16); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, regA, regS, (sint32)vImm); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regR = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 mask = ppc_mask(MB, ME); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regS); + if (SH) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regTmp, SH); + if (mask != 0) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regR, regR, (sint32)~mask); + if (mask != 0xFFFFFFFF) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regR, regR, regTmp); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, regA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regR); return true; } From e33272651cbe9b7fd38284f0ac5e6b1c0e36c5cc Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 28 Oct 2024 23:09:56 +0100 Subject: [PATCH 60/64] PPCRec: Optimizations --- boost.natvis | 12 ++ .../Recompiler/BackendX64/BackendX64.cpp | 128 +++++++++--------- .../Recompiler/BackendX64/BackendX64.h | 9 ++ .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 17 --- .../Recompiler/IML/IMLInstruction.cpp | 23 +--- .../Espresso/Recompiler/IML/IMLInstruction.h | 3 - .../Recompiler/PPCRecompilerImlGen.cpp | 18 --- src/gui/MainWindow.cpp | 2 +- 8 files changed, 90 insertions(+), 122 deletions(-) diff --git a/boost.natvis b/boost.natvis index cee3e3d54..2781a5859 100644 --- a/boost.natvis +++ b/boost.natvis @@ -11,4 +11,16 @@ + + {{ size={m_holder.m_size} }} + + m_holder.m_size + static_capacity + + m_holder.m_size + ($T1*)m_holder.storage.data + + + + diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index f60ac2b8b..9c0b44165 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -7,6 +7,7 @@ #include "Cafe/OS/libs/coreinit/coreinit_Time.h" #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" +#include static x86Assembler64::GPR32 _reg32(IMLReg physReg) { @@ -658,29 +659,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - cemu_assert_unimplemented(); - //if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - //{ - // // registerResult = immS32 (conditional) - // if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - // { - // assert_dbg(); - // } - - // x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - // uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - // if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - // else - // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - // return true; - //} - return false; -} - bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR); @@ -973,47 +951,71 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc return true; } -bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB) { - auto regR = _reg8(imlInstruction->op_compare.regR); - auto regA = _reg32(imlInstruction->op_compare.regA); - auto regB = _reg32(imlInstruction->op_compare.regB); - X86Cond cond = _x86Cond(imlInstruction->op_compare.cond); - bool keepR = regR == regA || regR == regB; - if(!keepR) - { - x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc - x64GenContext->emitter->CMP_dd(regA, regB); - x64GenContext->emitter->SETcc_b(cond, regR); - } - else - { - x64GenContext->emitter->CMP_dd(regA, regB); - x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); - x64GenContext->emitter->SETcc_b(cond, regR); - } - return true; + if(imlInstructionA->type != imlInstructionB->type) + return false; + if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE) + return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB; + else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32) + return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32; + return false; } -bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed) { - auto regR = _reg8(imlInstruction->op_compare_s32.regR); - auto regA = _reg32(imlInstruction->op_compare_s32.regA); - sint32 imm = imlInstruction->op_compare_s32.immS32; - X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond); - bool keepR = regR == regA; - if(!keepR) - { - x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc - x64GenContext->emitter->CMP_di32(regA, imm); - x64GenContext->emitter->SETcc_b(cond, regR); + extraInstructionsProcessed = 0; + boost::container::static_vector compareInstructions; + compareInstructions.push_back(imlInstruction); + for(sint32 i=1; i<4; i++) + { + IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i); + if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns)) + break; + compareInstructions.push_back(nextIns); + } + auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool + { + if(ins->type == PPCREC_IML_TYPE_COMPARE) + return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB); + else if(ins->type == PPCREC_IML_TYPE_COMPARE_S32) + return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA); + }; + auto GetRegR = [](IMLInstruction* insn) + { + return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR)); + }; + // prefer XOR method for zeroing out registers if possible + for(auto& it : compareInstructions) + { + if(OperandOverlapsWithR(it)) + continue; + auto regR = GetRegR(it); + x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc + } + // emit the compare instruction + if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE) + { + auto regA = _reg32(imlInstruction->op_compare.regA); + auto regB = _reg32(imlInstruction->op_compare.regB); + x64GenContext->emitter->CMP_dd(regA, regB); } - else + else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) { + auto regA = _reg32(imlInstruction->op_compare_s32.regA); + sint32 imm = imlInstruction->op_compare_s32.immS32; x64GenContext->emitter->CMP_di32(regA, imm); - x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); + } + // emit the SETcc instructions + for(auto& it : compareInstructions) + { + auto regR = _reg8(it->op_compare.regR); + X86Cond cond = _x86Cond(it->op_compare.cond); + if(OperandOverlapsWithR(it)) + x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); x64GenContext->emitter->SETcc_b(cond, regR); } + extraInstructionsProcessed = (sint32)compareInstructions.size() - 1; return true; } @@ -1383,6 +1385,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo segIt->x64Offset = x64GenContext.emitter->GetWriteIndex(); for(size_t i=0; iimlList.size(); i++) { + x64GenContext.m_currentInstructionEmitIndex = i; IMLInstruction* imlInstruction = segIt->imlList.data() + i; if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) @@ -1403,11 +1406,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) codeGenerationFailed = true; } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - codeGenerationFailed = true; - } else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) { if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) @@ -1428,13 +1426,11 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) codeGenerationFailed = true; } - else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) - { - PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) { - PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + sint32 extraInstructionsProcessed; + PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed); + i += extraInstructionsProcessed; } else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 1a0fffec5..a66469809 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -15,6 +15,7 @@ struct x64GenContext_t { IMLSegment* currentSegment{}; x86Assembler64* emitter; + sint32 m_currentInstructionEmitIndex; x64GenContext_t() { @@ -26,6 +27,14 @@ struct x64GenContext_t delete emitter; } + IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1) + { + sint32 index = m_currentInstructionEmitIndex + relativeIndex; + if(index < 0 || index >= (sint32)currentSegment->imlList.size()) + return nullptr; + return currentSegment->imlList.data() + index; + } + // relocate offsets std::vector relocateOffsetTable2; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 1cfb470de..07fd4002d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -424,23 +424,6 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di { strOutput.addFmt("CYCLE_CHECK"); } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); - bool displayAsHex = false; - if (inst.operation == PPCREC_IML_OP_ASSIGN) - { - displayAsHex = true; - strOutput.add("="); - } - else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); - if (displayAsHex) - strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); - else - strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); - strOutput.add(" (conditional)"); - } else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) { strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond)); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 480b0d8dc..cb4810431 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -80,17 +80,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->writtenGPR1 = op_r_immS32.regR; } } - else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - if (operation == PPCREC_IML_OP_ASSIGN) - { - // result is written, but also considered read (in case the condition is false the input is preserved) - registersUsed->readGPR1 = op_conditional_r_s32.regR; - registersUsed->writtenGPR1 = op_conditional_r_s32.regR; - } - else - cemu_assert_unimplemented(); - } else if (type == PPCREC_IML_TYPE_R_R_S32) { registersUsed->writtenGPR1 = op_r_r_s32.regR; @@ -117,9 +106,13 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if (type == PPCREC_IML_TYPE_R_R_R) { // in all cases result is written and other operands are read only + // with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read registersUsed->writtenGPR1 = op_r_r_r.regR; - registersUsed->readGPR1 = op_r_r_r.regA; - registersUsed->readGPR2 = op_r_r_r.regB; + if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB)) + { + registersUsed->readGPR1 = op_r_r_r.regA; + registersUsed->readGPR2 = op_r_r_r.regB; + } } else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -502,10 +495,6 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr { op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable); } - else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - op_conditional_r_s32.regR = replaceRegisterIdMultiple(op_conditional_r_s32.regR, translationTable); - } else if (type == PPCREC_IML_TYPE_R_R_S32) { op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 2cd1d642d..3ba0a1aff 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -250,9 +250,6 @@ enum // atomic PPCREC_IML_TYPE_ATOMIC_CMP_STORE, - // conditional (legacy) - PPCREC_IML_TYPE_CONDITIONAL_R_S32, - // function call PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 55d4a94b7..5ea424d3c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -54,23 +54,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext return &inst; } -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - else - memset(imlInstruction, 0, sizeof(IMLInstruction)); - imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; - imlInstruction->operation = operation; - // r_s32 operation - imlInstruction->op_conditional_r_s32.regR = registerIndex; - imlInstruction->op_conditional_r_s32.immS32 = immS32; - // condition - imlInstruction->op_conditional_r_s32.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditional_r_s32.crBitIndex = crBitIndex; - imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { cemu_assert_debug(registerMemory1.IsValid()); @@ -559,7 +542,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); - // decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK) Espresso::BOField boField(BO); uint32 crRegister = BI/4; diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index e38cb6177..edc82276a 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -2225,7 +2225,7 @@ void MainWindow::RecreateMenu() wxMenu* debugDumpMenu = new wxMenu; debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES, _("&Textures"), wxEmptyString)->Check(ActiveSettings::DumpTexturesEnabled()); debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, _("&Shaders"), wxEmptyString)->Check(ActiveSettings::DumpShadersEnabled()); - debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, _("&Recompiler functions"), wxEmptyString)->Check(ActiveSettings::DumpRecompilerFunctionsEnabled()); + debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, _("&Recompiled functions"), wxEmptyString)->Check(ActiveSettings::DumpRecompilerFunctionsEnabled()); debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, _("&nlibcurl HTTP/HTTPS requests"), wxEmptyString); // debug submenu wxMenu* debugMenu = new wxMenu(); From a05b6558c489c732b66484226a4ee507e1b9ca50 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:38:27 +0100 Subject: [PATCH 61/64] PPCRec: Handle edge case for x86 shift instructions --- .../Espresso/Recompiler/BackendX64/BackendX64.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 9c0b44165..d3750543d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -828,9 +828,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else { - cemu_assert_debug(rRegResult != rRegOperand2); - cemu_assert_debug(rRegResult != X86_REG_RCX); - cemu_assert_debug(rRegOperand2 == X86_REG_RCX); + cemu_assert_debug(rRegOperand2 == X86_REG_ECX); + bool useTempReg = rRegResult == X86_REG_ECX && rRegOperand1 != X86_REG_ECX; + auto origRegResult = rRegResult; + if(useTempReg) + { + x64GenContext->emitter->MOV_dd(REG_RESV_TEMP, rRegOperand1); + rRegResult = REG_RESV_TEMP; + } if(rRegOperand1 != rRegResult) x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) @@ -839,6 +844,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64GenContext->emitter->SHR_d_CL(rRegResult); else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) x64GenContext->emitter->SHL_d_CL(rRegResult); + if(useTempReg) + x64GenContext->emitter->MOV_dd(origRegResult, REG_RESV_TEMP); } } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) From 83569ae66b69c547af5d9ec53e05c2979d142c07 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 30 Oct 2024 02:41:43 +0100 Subject: [PATCH 62/64] PPCRec: Avoid relying on undefined behavior in std::copy_backwards --- src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index cb61fecf2..f2cf173a6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -611,7 +611,8 @@ sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 ta { cemu_assert_debug(targetIndex > 0); targetIndex--; - std::copy_backward(seg.imlList.begin() + initialIndex + 1, seg.imlList.begin() + targetIndex + 1, seg.imlList.begin() + targetIndex); + for(size_t i=initialIndex; i Date: Wed, 30 Oct 2024 03:49:42 +0100 Subject: [PATCH 63/64] PPCRec: Fix stack pointer alignment for calls --- .../HW/Espresso/Recompiler/BackendX64/BackendX64.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index d3750543d..de9511949 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -177,6 +177,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { + //x64Gen_int3(x64GenContext); uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg); if(X86_REG_RDX != branchDstReg) x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg); @@ -553,10 +554,12 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here - x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x28); // reserve enough space for any parameters while keeping stack alignment of 16 intact + x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x20); // reserve enough space for any parameters while keeping stack alignment of 16 intact x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress); x64GenContext->emitter->CALL_q(X86_REG_RAX); - x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x28); + x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x20); + // a note about the stack pointer: + // currently the code generated by generateEnterRecompilerCode makes sure the stack is 16 byte aligned, so we don't need to fix it up here } bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -1623,7 +1626,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() { x64GenContext_t x64GenContext{}; - // start of recompiler entry function + // start of recompiler entry function (15 regs) x64Gen_push_reg64(&x64GenContext, X86_REG_RAX); x64Gen_push_reg64(&x64GenContext, X86_REG_RCX); x64Gen_push_reg64(&x64GenContext, X86_REG_RDX); From 9187044cadda452fd2b19036a0f626c8cc5fc6e0 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:25:02 +0100 Subject: [PATCH 64/64] PPCRec: Use named register constants instead of hardcoding regs --- .../Recompiler/BackendX64/BackendX64.cpp | 205 +++++++----------- .../Recompiler/BackendX64/BackendX64.h | 3 - .../Recompiler/BackendX64/BackendX64FPU.cpp | 61 +++--- 3 files changed, 103 insertions(+), 166 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index de9511949..6a8aac2b9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -194,7 +194,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, { // MOV DWORD [SPR_LinkRegister], newLR uint32 newLR = imlInstruction->op_macro.param + 4; - x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), newLR); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), newLR); // remember new instruction pointer in RDX uint32 newIP = imlInstruction->op_macro.param2; x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP); @@ -267,26 +267,20 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) { uint32 cycleCount = imlInstruction->op_macro.param; - x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); + x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); return true; } else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE ) { uint32 ppcAddress = imlInstruction->op_macro.param; uint32 funcId = imlInstruction->op_macro.param2; - //x64Gen_int3(x64GenContext); // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); - //// save hCPU (RSP) - //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); - //x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_TEMP, 0, REG_RSP); + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, X86_REG_RSP); + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, REG_RESV_HCPU); x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId); - // restore stackpointer from executionContext/hCPU->rspTemp + // restore stackpointer from hCPU->rspTemp x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - //x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_R14, 0); - //x64Gen_int3(x64GenContext); // reserve space on stack for call parameters x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0); @@ -294,38 +288,33 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE); x64Gen_call_reg64(x64GenContext, X86_REG_RAX); // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE) - //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); - //x64Emit_mov_reg64_mem64Reg64(x64GenContext, REG_RSP, REG_RESV_TEMP, 0); - x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RSP, X86_REG_RAX); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_HCPU, X86_REG_RAX); // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); + x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData); // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); + x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base); // check if cycles where decreased beyond zero, if yes -> leave recompiler - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - //x64Gen_int3(x64GenContext); - //x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress); - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer)); // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function) x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX); - // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_R15); - //// JMP [recompilerCallTable+EAX/4*8] - //x64Gen_int3(x64GenContext); + // ADD RAX, REG_RESV_RECDATA + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA); + // JMP [recompilerCallTable+EAX/4*8] x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // check if instruction pointer was changed // assign new instruction pointer to EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer)); // remember instruction pointer in REG_EDX x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX); // EAX *= 2 x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX); - // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_R15); + // ADD RAX, REG_RESV_RECDATA + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA); // JMP [ppcRecompilerDirectJumpTable+RAX/4*8] x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); return true; @@ -360,18 +349,14 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed && realRegisterData == realRegisterMem2 ) { // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; + // this can easily be worked around by swapping realRegisterMem and realRegisterMem2 + std::swap(realRegisterMem, realRegisterMem2); } bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; if( imlInstruction->op_storeLoad.copyWidth == 32 ) { - //if( indexed ) - // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (indexed) { x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); @@ -380,28 +365,24 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p { if (indexed) { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (indexed && realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); } else { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); } } else { if (indexed) { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); if (switchEndian) x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); } else { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); if (switchEndian) x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); } @@ -415,13 +396,13 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } if(g_CPUFeatures.x86.movbe && switchEndian ) { - x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( switchEndian ) @@ -437,9 +418,9 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( signExtend ) - x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); else - x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); + x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } @@ -471,10 +452,8 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed && realRegisterData == realRegisterMem2) { // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; + // this can easily be worked around by swapping realRegisterMem and realRegisterMem2 + std::swap(realRegisterMem, realRegisterMem2); } bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; @@ -496,9 +475,9 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if (g_CPUFeatures.x86.movbe && swapEndian) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } @@ -509,7 +488,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); if (indexed) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // todo: Optimize this, e.g. by using MOVBE @@ -853,8 +832,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); // mov operand1 to EAX @@ -877,14 +856,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX); // restore EAX / EDX if( rRegResult != X86_REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); if( rRegResult != X86_REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); // mov operand 2 to temp register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); // mov operand1 to EAX @@ -909,9 +888,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX); // restore EAX / EDX if( rRegResult != X86_REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); if( rRegResult != X86_REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); } else { @@ -986,9 +965,10 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFuncti } auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool { + cemu_assert_debug(ins->type == PPCREC_IML_TYPE_COMPARE || ins->type == PPCREC_IML_TYPE_COMPARE_S32); if(ins->type == PPCREC_IML_TYPE_COMPARE) return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB); - else if(ins->type == PPCREC_IML_TYPE_COMPARE_S32) + else /* PPCREC_IML_TYPE_COMPARE_S32 */ return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA); }; auto GetRegR = [](IMLInstruction* insn) @@ -1160,7 +1140,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC // BT - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken()); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -1175,48 +1155,48 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, auto regR = _reg64(imlInstruction->op_r_name.regR); if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)); } else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else if (name == PPCREC_NAME_XER_CA) { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca)); } else if (name == PPCREC_NAME_XER_SO) { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so)); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); } else assert_dbg(); @@ -1226,11 +1206,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, auto regR = _regF64(imlInstruction->op_r_name.regR); if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -1251,48 +1231,48 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, auto regR = _reg64(imlInstruction->op_r_name.regR); if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR); } else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), regR); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR), regR); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER), regR); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, memOffset, regR); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); } else if (name == PPCREC_NAME_XER_CA) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name == PPCREC_NAME_XER_SO) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); } else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr), regR); } else if (name == PPCREC_NAME_CPU_MEMRES_VAL) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue), regR); } else assert_dbg(); @@ -1303,11 +1283,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -1320,42 +1300,6 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, } -//void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -//{ -// uint32 name = imlInstruction->op_r_name.name; -// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); -// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) -// { -// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); -// } -// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) -// { -// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); -// } -// else -// { -// cemu_assert_debug(false); -// } -//} -// -//void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -//{ -// uint32 name = imlInstruction->op_r_name.name; -// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); -// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) -// { -// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); -// } -// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) -// { -// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); -// } -// else -// { -// cemu_assert_debug(false); -// } -//} - uint8* codeMemoryBlock = nullptr; sint32 codeMemoryBlockIndex = 0; sint32 codeMemoryBlockSize = 0; @@ -1658,13 +1602,12 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP); - // MOV RSP, RDX (ppc interpreter instance) - x64Gen_mov_reg64_reg64(&x64GenContext, X86_REG_RSP, X86_REG_RDX); + x64Gen_mov_reg64_reg64(&x64GenContext, REG_RESV_HCPU, X86_REG_RDX); // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(&x64GenContext, X86_REG_R15, (uint64)ppcRecompilerInstanceData); + x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData); // MOV R13, memory_base - x64Gen_mov_reg64_imm64(&x64GenContext, X86_REG_R13, (uint64)memory_base); + x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base); //JMP recFunc x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1 @@ -1703,11 +1646,9 @@ void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() // update instruction pointer // LR is in EDX - x64Emit_mov_mem32_reg32(&x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX); - - // MOV RSP, [ppcRecompilerX64_rspTemp] + x64Emit_mov_mem32_reg32(&x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX); + // MOV RSP, [hCPU->rspTemp] x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - // RET x64Gen_ret(&x64GenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index a66469809..e4d1f5a91 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -71,9 +71,6 @@ enum X86_CONDITION_NONE, // no condition, jump always }; -#define PPC_X64_GPR_USABLE_REGISTERS (16-4) -#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register - bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index af19cae96..4d9a538df 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -70,7 +70,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, assert_dbg(); } // optimized code for ps float load - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); @@ -111,8 +111,8 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, } else { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); + x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); } x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP); // load constant 1.0 into lower half and upper half of temp register @@ -174,7 +174,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, if (readSize == 16) { // half word - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memOffset); + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap if (isSigned) x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); @@ -184,7 +184,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, else if (readSize == 8) { // byte - x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memOffset); + x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); if (isSigned) x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); else @@ -312,14 +312,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load value - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); } else { - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); @@ -333,31 +333,31 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); // calculate offset again x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); } else { // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); } } } @@ -416,9 +416,9 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); } if (g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, memReg, memImmS32, REG_RESV_TEMP); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, memReg, memImmS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); if (indexed) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); @@ -433,7 +433,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); - x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, X86_REG_R13, memReg, memImmS32); + x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); return; } // store as integer @@ -599,9 +599,9 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } if(g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); if( indexed ) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -615,15 +615,15 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti assert_dbg(); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); + x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); // store double low part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); // store double high part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); if( indexed ) { x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -635,15 +635,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); if( indexed ) { - if( realRegisterMem == realRegisterMem2 ) - assert_dbg(); + cemu_assert_debug(realRegisterMem == realRegisterMem2); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } else { - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); } } else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||