diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index f5de40823259e..fab544c0b95e3 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -770,9 +770,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) GenTree* source = treeNode->gtGetOp1(); - bool isStruct = source->TypeIs(TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); - - if (!isStruct) // a normal non-Struct argument + if (!source->TypeIs(TYP_STRUCT)) // a normal non-Struct argument { if (varTypeIsSIMD(source->TypeGet())) { @@ -864,9 +862,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { genPutArgStkFieldList(treeNode, varNumOut); } - else // We must have a GT_OBJ or a GT_LCL_VAR + else { - noway_assert(source->OperIs(GT_LCL_VAR, GT_OBJ)); + noway_assert(source->OperIsLocalRead() || source->OperIs(GT_OBJ)); var_types targetType = source->TypeGet(); noway_assert(varTypeIsStruct(targetType)); @@ -879,96 +877,42 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) #ifdef TARGET_ARM64 regNumber hiReg = treeNode->GetSingleTempReg(); #endif // TARGET_ARM64 - regNumber addrReg = REG_NA; - GenTreeLclVarCommon* varNode = nullptr; - GenTree* addrNode = nullptr; + GenTreeLclVarCommon* srcLclNode = nullptr; + regNumber addrReg = REG_NA; + ClassLayout* layout = nullptr; - if (source->OperGet() == GT_LCL_VAR) + // Setup "layout", "srcLclNode" and "addrReg". + if (source->OperIsLocalRead()) { - varNode = source->AsLclVarCommon(); + srcLclNode = source->AsLclVarCommon(); + layout = srcLclNode->GetLayout(compiler); + LclVarDsc* varDsc = compiler->lvaGetDesc(srcLclNode); + + // This struct must live on the stack frame. + assert(varDsc->lvOnFrame && !varDsc->lvRegister); } else // we must have a GT_OBJ { - assert(source->OperGet() == GT_OBJ); - - addrNode = source->AsOp()->gtOp1; + layout = source->AsObj()->GetLayout(); + addrReg = genConsumeReg(source->AsObj()->Addr()); - // addrNode can either be a GT_LCL_VAR_ADDR or an address expression - // - if (addrNode->OperGet() == GT_LCL_VAR_ADDR) +#ifdef TARGET_ARM64 + // If addrReg equal to loReg, swap(loReg, hiReg) + // This reduces code complexity by only supporting one addrReg overwrite case + if (loReg == addrReg) { - // We have a GT_OBJ(GT_LCL_VAR_ADDR) - // - // We will treat this case the same as above - // (i.e if we just had this GT_LCL_VAR directly as the source) - // so update 'source' to point this GT_LCL_VAR_ADDR node - // and continue to the codegen for the LCL_VAR node below - // - assert(addrNode->isContained()); - varNode = addrNode->AsLclVarCommon(); - addrNode = nullptr; + loReg = hiReg; + hiReg = addrReg; } - else // addrNode is used - { - // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays, - // but we use `genConsumeAddress` as a precaution, use `genConsumeReg()` instead. - assert(!addrNode->isContained()); - // Generate code to load the address that we need into a register - genConsumeAddress(addrNode); - addrReg = addrNode->GetRegNum(); - -#ifdef TARGET_ARM64 - // If addrReg equal to loReg, swap(loReg, hiReg) - // This reduces code complexity by only supporting one addrReg overwrite case - if (loReg == addrReg) - { - loReg = hiReg; - hiReg = addrReg; - } #endif // TARGET_ARM64 - } - } - - // Either varNode or addrNOde must have been setup above, - // the xor ensures that only one of the two is setup, not both - assert((varNode != nullptr) ^ (addrNode != nullptr)); - - ClassLayout* layout; - unsigned srcSize; - bool isHfa; - - // Setup the srcSize, isHFa, and gcPtrCount - if (source->OperGet() == GT_LCL_VAR) - { - assert(varNode != nullptr); - LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); - - // This struct also must live in the stack frame - // And it can't live in a register (SIMD) - assert(varDsc->lvType == TYP_STRUCT); - assert(varDsc->lvOnFrame && !varDsc->lvRegister); - - srcSize = varDsc->lvSize(); - isHfa = varDsc->lvIsHfa(); - layout = varDsc->GetLayout(); } - else // we must have a GT_OBJ - { - assert(source->OperGet() == GT_OBJ); - // If the source is an OBJ node then we need to use the type information - // it provides (size and GC layout) even if the node wraps a lclvar. Due - // to struct reinterpretation (e.g. Unsafe.As) it is possible that - // the OBJ node has a different type than the lclvar. - layout = source->AsObj()->GetLayout(); - srcSize = layout->GetSize(); - isHfa = compiler->IsHfa(layout->GetClassHandle()); - } + unsigned srcSize = layout->GetSize(); // If we have an HFA we can't have any GC pointers, // if not then the max size for the struct is 16 bytes - if (isHfa) + if (compiler->IsHfa(layout->GetClassHandle())) { noway_assert(!layout->HasGCPtr()); } @@ -981,45 +925,32 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) noway_assert(srcSize <= MAX_PASS_MULTIREG_BYTES); #endif // TARGET_ARM64 - unsigned structSize; - unsigned dstSize = treeNode->GetStackByteSize(); - if (dstSize != srcSize) + + // We can generate smaller code if store size is a multiple of TARGET_POINTER_SIZE. + // The dst size can be rounded up to PUTARG_STK size. The src size can be rounded up + // if it reads a local variable because reading "too much" from a local cannot fault. + // We must also be careful to check for the arm64 apple case where arguments can be + // passed without padding. + // + if ((dstSize != srcSize) && (srcLclNode != nullptr)) { - // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE. - // The dst size can be rounded up to PUTARG_STK size. - // The src size can be rounded up if it reads a local variable slot because the local - // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE. - // The exception is arm64 apple arguments because they can be passed without padding. - if (varNode != nullptr) + unsigned widenedSrcSize = roundUp(srcSize, TARGET_POINTER_SIZE); + if (widenedSrcSize <= dstSize) { - // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. - const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); - const unsigned varStackSize = varDsc->lvSize(); - if (varStackSize >= srcSize) - { - srcSize = varStackSize; - } + srcSize = widenedSrcSize; } } - if (dstSize == srcSize) - { - structSize = dstSize; - } - else - { - // With Unsafe object cast we can have different strange combinations: - // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes; - // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local. - structSize = min(dstSize, srcSize); - } - int remainingSize = structSize; + assert(srcSize <= dstSize); + + int remainingSize = srcSize; unsigned structOffset = 0; + unsigned lclOffset = (srcLclNode != nullptr) ? srcLclNode->GetLclOffs() : 0; unsigned nextIndex = 0; #ifdef TARGET_ARM64 - // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop + // For a >= 16-byte sizes we will generate a ldp and stp instruction each loop // ldp x2, x3, [x0] // stp x2, x3, [sp, #16] @@ -1028,11 +959,11 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) var_types type0 = layout->GetGCPtrType(nextIndex + 0); var_types type1 = layout->GetGCPtrType(nextIndex + 1); - if (varNode != nullptr) + if (srcLclNode != nullptr) { - // Load from our varNumImp source + // Load from our local source emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, - varNode->GetLclNum(), structOffset); + srcLclNode->GetLclNum(), lclOffset + structOffset); } else { @@ -1056,17 +987,18 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) nextIndex += 2; } #else // TARGET_ARM - // For a >= 4 byte structSize we will generate a ldr and str instruction each loop + // For a >= 4 byte sizes we will generate a ldr and str instruction each loop // ldr r2, [r0] // str r2, [sp, #16] while (remainingSize >= TARGET_POINTER_SIZE) { var_types type = layout->GetGCPtrType(nextIndex); - if (varNode != nullptr) + if (srcLclNode != nullptr) { - // Load from our varNumImp source - emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNode->GetLclNum(), structOffset); + // Load from our local source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, srcLclNode->GetLclNum(), + lclOffset + structOffset); } else { @@ -1088,7 +1020,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) } #endif // TARGET_ARM - // For a 12-byte structSize we will generate two load instructions + // For a 12-byte size we will generate two load instructions // ldr x2, [x0] // ldr w3, [x0, #8] // str x2, [sp, #16] @@ -1129,10 +1061,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) remainingSize -= moveSize; instruction loadIns = ins_Load(type); - if (varNode != nullptr) + if (srcLclNode != nullptr) { - // Load from our varNumImp source - emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset); + // Load from our local source + emit->emitIns_R_S(loadIns, attr, loReg, srcLclNode->GetLclNum(), lclOffset + structOffset); } else { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 49dde8acd1fd4..969c60717c2ea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1031,9 +1031,6 @@ bool Lowering::TryLowerSwitchToBitTest( #endif // TARGET_XARCH } -// NOTE: this method deliberately does not update the call arg table. It must only -// be used by NewPutArg and LowerArg; these functions are responsible for updating -// the call arg table as necessary. void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast) { assert(argSlot != nullptr); @@ -1069,12 +1066,7 @@ void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgO // Notes: // For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined) // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs -// for two eightbyte structs. -// -// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing -// (i.e. UNIX_AMD64_ABI defined) this method also sets the GC pointers count and the pointers -// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. -// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) +// for two eightbyte structs. For STK passed structs the method generates GT_PUTARG_STK tree. // GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type) { @@ -1086,19 +1078,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, bool isOnStack = (callArg->AbiInfo.GetRegNum() == REG_STK); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) - // Mark contained when we pass struct - // GT_FIELD_LIST is always marked contained when it is generated - if (type == TYP_STRUCT) - { - arg->SetContained(); - if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR)) - { - MakeSrcContained(arg, arg->AsObj()->Addr()); - } - } -#endif - #if FEATURE_ARG_SPLIT // Struct can be split into register(s) and stack on ARM if (compFeatureArgSplit() && callArg->AbiInfo.IsSplit()) @@ -1120,10 +1099,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, callArg->AbiInfo.GetStackByteSize(), #endif callArg->AbiInfo.NumRegs, call, putInIncomingArgArea); - // If struct argument is morphed to GT_FIELD_LIST node(s), - // we can know GC info by type of each GT_FIELD_LIST node. - // So we skip setting GC Pointer info. - // + GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit(); for (unsigned regIndex = 0; regIndex < callArg->AbiInfo.NumRegs; regIndex++) { @@ -1132,6 +1108,12 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, if (arg->OperGet() == GT_OBJ) { + arg->SetContained(); + if (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR) + { + MakeSrcContained(arg, arg->AsObj()->Addr()); + } + ClassLayout* layout = arg->AsObj()->GetLayout(); // Set type of registers @@ -1206,8 +1188,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, #ifdef DEBUG // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce // a result. So the type of its operand must be the correct type to push on the stack. - // For a FIELD_LIST, this will be the type of the field (not the type of the arg), - // but otherwise it is generally the type of the operand. callArg->CheckIsStruct(); #endif @@ -1459,6 +1439,13 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) ReplaceArgWithPutArgOrBitcast(ppArg, putArg); } } + + arg = *ppArg; + + if (arg->OperIs(GT_PUTARG_STK)) + { + LowerPutArgStk(arg->AsPutArgStk()); + } } #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5b50f178e7c6c..177034fabe709 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -574,6 +574,44 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT addr->SetContained(); } +//------------------------------------------------------------------------ +// LowerPutArgStk: Lower a GT_PUTARG_STK. +// +// Arguments: +// putArgStk - The node to lower +// +void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) +{ + GenTree* src = putArgStk->Data(); + + if (src->TypeIs(TYP_STRUCT)) + { + // STRUCT args (FIELD_LIST / OBJ / LCL_VAR / LCL_FLD) will always be contained. + MakeSrcContained(putArgStk, src); + + // TODO-ADDR: always perform this transformation in local morph and delete this code. + if (src->OperIs(GT_OBJ) && src->AsObj()->Addr()->OperIsLocalAddr()) + { + GenTreeLclVarCommon* lclAddrNode = src->AsObj()->Addr()->AsLclVarCommon(); + unsigned lclNum = lclAddrNode->GetLclNum(); + unsigned lclOffs = lclAddrNode->GetLclOffs(); + ClassLayout* layout = src->AsObj()->GetLayout(); + + src->ChangeOper(GT_LCL_FLD); + src->AsLclFld()->SetLclNum(lclNum); + src->AsLclFld()->SetLclOffs(lclOffs); + src->AsLclFld()->SetLayout(layout); + + BlockRange().Remove(lclAddrNode); + } + else if (src->OperIs(GT_LCL_VAR)) + { + // TODO-1stClassStructs: support struct enregistration here by retyping "src" to its register type. + comp->lvaSetVarDoNotEnregister(src->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg)); + } + } +} + //------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 7a2cde0c4f5fb..1de03b57e8e92 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -416,6 +416,30 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT addr->SetContained(); } +//------------------------------------------------------------------------ +// LowerPutArgStk: Lower a GT_PUTARG_STK. +// +// Arguments: +// putArgStk - The node to lower +// +void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) +{ + GenTree* src = putArgStk->Data(); + + if (src->TypeIs(TYP_STRUCT)) + { + // STRUCT args (FIELD_LIST / OBJ) will always be contained. + MakeSrcContained(putArgStk, src); + + // Additionally, codegen supports containment of local addresses under OBJs. + if (src->OperIs(GT_OBJ) && src->AsObj()->Addr()->OperIs(GT_LCL_VAR_ADDR)) + { + // TODO-LOONGARCH64-CQ: support containment of LCL_FLD_ADDR too. + MakeSrcContained(src, src->AsObj()->Addr()); + } + } +} + //------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 46990d9f6dbc6..6d70aba0b9b8c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -455,10 +455,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT // LowerPutArgStk: Lower a GT_PUTARG_STK. // // Arguments: -// tree - The node of interest -// -// Return Value: -// None. +// putArgStk - The node of interest // void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { @@ -4613,22 +4610,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) MakeSrcContained(call, ctrlExpr); } } - - for (CallArg& arg : call->gtArgs.EarlyArgs()) - { - if (arg.GetEarlyNode()->OperIs(GT_PUTARG_STK)) - { - LowerPutArgStk(arg.GetEarlyNode()->AsPutArgStk()); - } - } - - for (CallArg& arg : call->gtArgs.LateArgs()) - { - if (arg.GetLateNode()->OperIs(GT_PUTARG_STK)) - { - LowerPutArgStk(arg.GetLateNode()->AsPutArgStk()); - } - } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 4a9eefef350c5..1ae93e9643548 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -403,20 +403,19 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { assert(argNode->gtOper == GT_PUTARG_STK); - GenTree* putArgChild = argNode->gtGetOp1(); - - int srcCount = 0; + GenTree* src = argNode->Data(); + int srcCount = 0; - // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct - if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST)) + // Do we have a TYP_STRUCT argument, if so it must be a multireg pass-by-value struct + if (src->TypeIs(TYP_STRUCT)) { // We will use store instructions that each write a register sized value - if (putArgChild->OperIs(GT_FIELD_LIST)) + if (src->OperIs(GT_FIELD_LIST)) { - assert(putArgChild->isContained()); + assert(src->isContained()); // We consume all of the items in the GT_FIELD_LIST - for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + for (GenTreeFieldList::Use& use : src->AsFieldList()->Uses()) { BuildUse(use.GetNode()); srcCount++; @@ -443,36 +442,25 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) buildInternalIntRegisterDefForNode(argNode); #endif // TARGET_ARM64 - if (putArgChild->OperGet() == GT_OBJ) + assert(src->isContained()); + + if (src->OperIs(GT_OBJ)) { - assert(putArgChild->isContained()); - GenTree* objChild = putArgChild->gtGetOp1(); - if (objChild->OperGet() == GT_LCL_VAR_ADDR) - { - // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation, and there are no source registers. - // - assert(objChild->isContained()); - } - else - { - // We will generate all of the code for the GT_PUTARG_STK and its child node - // as one contained operation - // - srcCount = BuildOperandUses(objChild); - } + // Build uses for the address to load from. + // + srcCount = BuildOperandUses(src->AsObj()->Addr()); } else { // No source registers. - putArgChild->OperIs(GT_LCL_VAR); + assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD)); } } } else { - assert(!putArgChild->isContained()); - srcCount = BuildOperandUses(putArgChild); + assert(!src->isContained()); + srcCount = BuildOperandUses(src); #if defined(FEATURE_SIMD) if (compMacOsArm64Abi() && argNode->GetStackByteSize() == 12) {