Skip to content

Commit

Permalink
Bug 1835034 - Part 8: Inline DataView.prototype.getFloat16. r=jandem
Browse files Browse the repository at this point in the history
Extend the existing DataView code to also support Float16, using similar
changes as the previous part.

Depends on D215768

Differential Revision: https://phabricator.services.mozilla.com/D215769
  • Loading branch information
anba committed Jul 13, 2024
1 parent fd156b6 commit 33d0c3a
Show file tree
Hide file tree
Showing 18 changed files with 150 additions and 37 deletions.
4 changes: 2 additions & 2 deletions js/src/builtin/DataViewObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1145,8 +1145,8 @@ const JSFunctionSpec DataViewObject::methods[] = {
DataViewGetInt32),
JS_INLINABLE_FN("getUint32", DataViewObject::fun_getUint32, 1, 0,
DataViewGetUint32),
// TODO: See Bug 1835034 for JIT support for Float16Array
JS_FN("getFloat16", DataViewObject::fun_getFloat16, 1, 0),
JS_INLINABLE_FN("getFloat16", DataViewObject::fun_getFloat16, 1, 0,
DataViewGetFloat16),
JS_INLINABLE_FN("getFloat32", DataViewObject::fun_getFloat32, 1, 0,
DataViewGetFloat32),
JS_INLINABLE_FN("getFloat64", DataViewObject::fun_getFloat64, 1, 0,
Expand Down
2 changes: 2 additions & 0 deletions js/src/jit/CacheIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11711,6 +11711,8 @@ AttachDecision InlinableNativeIRGenerator::tryAttachStub() {
return tryAttachDataViewGet(Scalar::Int32);
case InlinableNative::DataViewGetUint32:
return tryAttachDataViewGet(Scalar::Uint32);
case InlinableNative::DataViewGetFloat16:
return tryAttachDataViewGet(Scalar::Float16);
case InlinableNative::DataViewGetFloat32:
return tryAttachDataViewGet(Scalar::Float32);
case InlinableNative::DataViewGetFloat64:
Expand Down
27 changes: 20 additions & 7 deletions js/src/jit/CacheIRCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6898,16 +6898,18 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
Register64 outputReg64 = output.valueReg().toRegister64();
Register outputScratch = outputReg64.scratchReg();

Register boundsCheckScratch;
Register scratch2;
#ifndef JS_CODEGEN_X86
Maybe<AutoScratchRegister> maybeBoundsCheckScratch;
if (viewKind == ArrayBufferViewKind::Resizable) {
maybeBoundsCheckScratch.emplace(allocator, masm);
boundsCheckScratch = *maybeBoundsCheckScratch;
Maybe<AutoScratchRegister> maybeScratch2;
if (viewKind == ArrayBufferViewKind::Resizable ||
(elementType == Scalar::Float16 &&
!MacroAssembler::SupportsFloat32To16())) {
maybeScratch2.emplace(allocator, masm);
scratch2 = *maybeScratch2;
}
#else
// Not enough registers on x86, so use the other part of outputReg64.
boundsCheckScratch = outputReg64.secondScratchReg();
scratch2 = outputReg64.secondScratchReg();
#endif

FailurePath* failure;
Expand All @@ -6918,7 +6920,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
const size_t byteSize = Scalar::byteSize(elementType);

emitDataViewBoundsCheck(viewKind, byteSize, obj, offset, outputScratch,
boundsCheckScratch, failure->label());
scratch2, failure->label());

masm.loadPtr(Address(obj, DataViewObject::dataOffset()), outputScratch);

Expand All @@ -6935,6 +6937,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
masm.load16UnalignedSignExtend(source, outputScratch);
break;
case Scalar::Uint16:
case Scalar::Float16:
masm.load16UnalignedZeroExtend(source, outputScratch);
break;
case Scalar::Int32:
Expand Down Expand Up @@ -6963,6 +6966,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
masm.byteSwap16SignExtend(outputScratch);
break;
case Scalar::Uint16:
case Scalar::Float16:
masm.byteSwap16ZeroExtend(outputScratch);
break;
case Scalar::Int32:
Expand Down Expand Up @@ -7002,6 +7006,15 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
failure->label());
break;
}
case Scalar::Float16: {
FloatRegister scratchFloat32 = floatScratch0.get().asSingle();
masm.moveGPRToFloat16(outputScratch, scratchFloat32, scratch2,
liveVolatileRegs());
masm.canonicalizeFloat(scratchFloat32);
masm.convertFloat32ToDouble(scratchFloat32, floatScratch0);
masm.boxDouble(floatScratch0, output.valueReg(), floatScratch0);
break;
}
case Scalar::Float32: {
FloatRegister scratchFloat32 = floatScratch0.get().asSingle();
masm.moveGPRToFloat32(outputScratch, scratchFloat32);
Expand Down
40 changes: 27 additions & 13 deletions js/src/jit/CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18137,13 +18137,19 @@ void CodeGenerator::visitLoadUnboxedBigInt(LLoadUnboxedBigInt* lir) {
void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
Register elements = ToRegister(lir->elements());
const LAllocation* littleEndian = lir->littleEndian();
Register temp = ToTempRegisterOrInvalid(lir->temp());
Register temp1 = ToTempRegisterOrInvalid(lir->temp1());
Register temp2 = ToTempRegisterOrInvalid(lir->temp2());
Register64 temp64 = ToTempRegister64OrInvalid(lir->temp64());
AnyRegister out = ToAnyRegister(lir->output());

const MLoadDataViewElement* mir = lir->mir();
Scalar::Type storageType = mir->storageType();

LiveRegisterSet volatileRegs;
if (MacroAssembler::LoadRequiresCall(storageType)) {
volatileRegs = liveVolatileRegs(lir);
}

BaseIndex source(elements, ToRegister(lir->index()), TimesOne);

bool noSwap = littleEndian->isConstant() &&
Expand All @@ -18154,19 +18160,17 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
if (noSwap && (!Scalar::isFloatingType(storageType) ||
MacroAssembler::SupportsFastUnalignedFPAccesses())) {
if (!Scalar::isBigIntType(storageType)) {
MOZ_ASSERT(storageType != Scalar::Float16);

Label fail;
masm.loadFromTypedArray(storageType, source, out, temp, InvalidReg, &fail,
LiveRegisterSet{});
masm.loadFromTypedArray(storageType, source, out, temp1, temp2, &fail,
volatileRegs);

if (fail.used()) {
bailoutFrom(&fail, lir->snapshot());
}
} else {
masm.load64(source, temp64);

emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp);
emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp1);
}
return;
}
Expand All @@ -18183,10 +18187,13 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
masm.load32Unaligned(source, out.gpr());
break;
case Scalar::Uint32:
masm.load32Unaligned(source, out.isFloat() ? temp : out.gpr());
masm.load32Unaligned(source, out.isFloat() ? temp1 : out.gpr());
break;
case Scalar::Float16:
masm.load16UnalignedZeroExtend(source, temp1);
break;
case Scalar::Float32:
masm.load32Unaligned(source, temp);
masm.load32Unaligned(source, temp1);
break;
case Scalar::Float64:
case Scalar::BigInt64:
Expand Down Expand Up @@ -18220,10 +18227,13 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
masm.byteSwap32(out.gpr());
break;
case Scalar::Uint32:
masm.byteSwap32(out.isFloat() ? temp : out.gpr());
masm.byteSwap32(out.isFloat() ? temp1 : out.gpr());
break;
case Scalar::Float16:
masm.byteSwap16ZeroExtend(temp1);
break;
case Scalar::Float32:
masm.byteSwap32(temp);
masm.byteSwap32(temp1);
break;
case Scalar::Float64:
case Scalar::BigInt64:
Expand All @@ -18250,16 +18260,20 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
break;
case Scalar::Uint32:
if (out.isFloat()) {
masm.convertUInt32ToDouble(temp, out.fpu());
masm.convertUInt32ToDouble(temp1, out.fpu());
} else {
// Bail out if the value doesn't fit into a signed int32 value. This
// is what allows MLoadDataViewElement to have a type() of
// MIRType::Int32 for UInt32 array loads.
bailoutTest32(Assembler::Signed, out.gpr(), out.gpr(), lir->snapshot());
}
break;
case Scalar::Float16:
masm.moveGPRToFloat16(temp1, out.fpu(), temp2, volatileRegs);
masm.canonicalizeFloat(out.fpu());
break;
case Scalar::Float32:
masm.moveGPRToFloat32(temp, out.fpu());
masm.moveGPRToFloat32(temp1, out.fpu());
masm.canonicalizeFloat(out.fpu());
break;
case Scalar::Float64:
Expand All @@ -18268,7 +18282,7 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
break;
case Scalar::BigInt64:
case Scalar::BigUint64:
emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp);
emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp1);
break;
case Scalar::Int8:
case Scalar::Uint8:
Expand Down
1 change: 1 addition & 0 deletions js/src/jit/InlinableNatives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ bool js::jit::CanInlineNativeCrossRealm(InlinableNative native) {
case InlinableNative::DataViewGetUint16:
case InlinableNative::DataViewGetInt32:
case InlinableNative::DataViewGetUint32:
case InlinableNative::DataViewGetFloat16:
case InlinableNative::DataViewGetFloat32:
case InlinableNative::DataViewGetFloat64:
case InlinableNative::DataViewGetBigInt64:
Expand Down
1 change: 1 addition & 0 deletions js/src/jit/InlinableNatives.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
_(DataViewGetUint16) \
_(DataViewGetInt32) \
_(DataViewGetUint32) \
_(DataViewGetFloat16) \
_(DataViewGetFloat32) \
_(DataViewGetFloat64) \
_(DataViewGetBigInt64) \
Expand Down
25 changes: 17 additions & 8 deletions js/src/jit/Lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4589,40 +4589,49 @@ void LIRGenerator::visitLoadDataViewElement(MLoadDataViewElement* ins) {

// We need a temp register for:
// - Uint32Array with known double result,
// - Float16Array,
// - Float32Array,
// - and BigInt64Array and BigUint64Array.
LDefinition tempDef = LDefinition::BogusTemp();
LDefinition temp1 = LDefinition::BogusTemp();
if ((ins->storageType() == Scalar::Uint32 &&
IsFloatingPointType(ins->type())) ||
ins->storageType() == Scalar::Float16 ||
ins->storageType() == Scalar::Float32) {
tempDef = temp();
temp1 = temp();
}
if (Scalar::isBigIntType(ins->storageType())) {
#ifdef JS_CODEGEN_X86
// There are not enough registers on x86.
if (littleEndian.isConstant()) {
tempDef = temp();
temp1 = temp();
}
#else
tempDef = temp();
temp1 = temp();
#endif
}

// Additional temp when Float16 to Float64 conversion requires a call.
LDefinition temp2 = LDefinition::BogusTemp();
if (MacroAssembler::LoadRequiresCall(ins->storageType())) {
temp2 = temp();
}

// We also need a separate 64-bit temp register for:
// - Float64Array
// - and BigInt64Array and BigUint64Array.
LInt64Definition temp64Def = LInt64Definition::BogusTemp();
LInt64Definition temp64 = LInt64Definition::BogusTemp();
if (Scalar::byteSize(ins->storageType()) == 8) {
temp64Def = tempInt64();
temp64 = tempInt64();
}

auto* lir = new (alloc())
LLoadDataViewElement(elements, index, littleEndian, tempDef, temp64Def);
LLoadDataViewElement(elements, index, littleEndian, temp1, temp2, temp64);
if (ins->fallible()) {
assignSnapshot(lir, ins->bailoutKind());
}
define(lir, ins);
if (Scalar::isBigIntType(ins->storageType())) {
if (Scalar::isBigIntType(ins->storageType()) ||
MacroAssembler::LoadRequiresCall(ins->storageType())) {
assignSafepoint(lir, ins);
}
}
Expand Down
2 changes: 1 addition & 1 deletion js/src/jit/MIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -6732,7 +6732,7 @@ class MLoadDataViewElement : public MTernaryInstruction,
void computeRange(TempAllocator& alloc) override;

bool canProduceFloat32() const override {
return storageType_ == Scalar::Float32;
return storageType_ == Scalar::Float32 || storageType_ == Scalar::Float16;
}

ALLOW_CLONE(MLoadDataViewElement)
Expand Down
27 changes: 27 additions & 0 deletions js/src/jit/MacroAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7702,6 +7702,33 @@ template void MacroAssembler::loadFloat16(const BaseIndex& src,
Register temp2,
LiveRegisterSet volatileLiveRegs);

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest,
Register temp,
LiveRegisterSet volatileLiveRegs) {
if (MacroAssembler::SupportsFloat32To16()) {
moveGPRToFloat16(src, dest);

// Float16 is currently passed as Float32, so expand again to Float32.
convertFloat16ToFloat32(dest, dest);
return;
}

LiveRegisterSet save = volatileLiveRegs;
save.takeUnchecked(dest);
save.takeUnchecked(dest.asDouble());
save.takeUnchecked(temp);

PushRegsInMask(save);

using Fn = float (*)(int32_t);
setupUnalignedABICall(temp);
passABIArg(src);
callWithABI<Fn, jit::Float16ToFloat32>(ABIType::Float32);
storeCallFloatResult(dest);

PopRegsInMask(save);
}

void MacroAssembler::debugAssertIsObject(const ValueOperand& val) {
#ifdef DEBUG
Label ok;
Expand Down
8 changes: 8 additions & 0 deletions js/src/jit/MacroAssembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
inline void move64(Imm64 imm, Register64 dest) PER_ARCH;
inline void move64(Register64 src, Register64 dest) PER_ARCH;


// Clears the high words of `src`.
inline void moveGPRToFloat16(Register src,
FloatRegister dest) PER_SHARED_ARCH;

inline void moveFloat32ToGPR(FloatRegister src,
Register dest) PER_SHARED_ARCH;
inline void moveGPRToFloat32(Register src,
Expand Down Expand Up @@ -5287,6 +5292,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
void loadFloat16(const T& src, FloatRegister dest, Register temp1,
Register temp2, LiveRegisterSet volatileLiveRegs);

void moveGPRToFloat16(Register src, FloatRegister dest, Register temp,
LiveRegisterSet volatileLiveRegs);

void debugAssertIsObject(const ValueOperand& val);
void debugAssertObjHasFixedSlots(Register obj, Register scratch);

Expand Down
4 changes: 4 additions & 0 deletions js/src/jit/arm/MacroAssembler-arm-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ void MacroAssembler::move64(Imm64 imm, Register64 dest) {
move32(Imm32((imm.value >> 32) & 0xFFFFFFFFL), dest.high);
}

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
MOZ_CRASH("Not supported for this target");
}

void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
ma_vxfer(src, dest);
}
Expand Down
9 changes: 9 additions & 0 deletions js/src/jit/arm64/MacroAssembler-arm64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ void MacroAssembler::move64(Imm64 imm, Register64 dest) {
Mov(ARMRegister(dest.reg, 64), imm.value);
}

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
// Ensure the hi-word is zeroed.
Uxth(ARMRegister(src, 32), ARMRegister(src, 32));

// Direct "32-bit to half-precision" move requires (FEAT_FP16), so we
// instead use a "32-bit to single-precision" move.
Fmov(ARMFPRegister(dest, 32), ARMRegister(src, 32));
}

void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
Fmov(ARMRegister(dest, 32), ARMFPRegister(src, 32));
}
Expand Down
4 changes: 4 additions & 0 deletions js/src/jit/loong64/MacroAssembler-loong64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
moveToFloat32(src, dest);
}

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
MOZ_CRASH("Not supported for this target");
}

void MacroAssembler::move8ZeroExtend(Register src, Register dest) {
as_bstrpick_d(dest, src, 7, 0);
}
Expand Down
4 changes: 4 additions & 0 deletions js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ namespace jit {

//{{{ check_macroassembler_style

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
MOZ_CRASH("Not supported for this target");
}

void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
moveFromFloat32(src, dest);
}
Expand Down
3 changes: 3 additions & 0 deletions js/src/jit/riscv64/MacroAssembler-riscv64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,9 @@ void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
fmv_w_x(dest, src);
}
void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
MOZ_CRASH("Not supported for this target");
}
void MacroAssembler::mul32(Register rhs, Register srcDest) {
mulw(srcDest, srcDest, rhs);
}
Expand Down
Loading

0 comments on commit 33d0c3a

Please sign in to comment.