Bug 1835034 - Part 8: Inline DataView.prototype.getFloat16. r=jandem

Extend the existing DataView code to also support Float16, using similar changes as the previous part. Depends on D215768 Differential Revision: https://phabricator.services.mozilla.com/D215769
i3roly · Jul 13, 2024 · 33d0c3a · 33d0c3a
1 parent fd156b6
commit 33d0c3a
Show file tree

Hide file tree

Showing 18 changed files with 150 additions and 37 deletions.
diff --git a/js/src/builtin/DataViewObject.cpp b/js/src/builtin/DataViewObject.cpp
@@ -1145,8 +1145,8 @@ const JSFunctionSpec DataViewObject::methods[] = {
                     DataViewGetInt32),
     JS_INLINABLE_FN("getUint32", DataViewObject::fun_getUint32, 1, 0,
                     DataViewGetUint32),
-    // TODO: See Bug 1835034 for JIT support for Float16Array
-    JS_FN("getFloat16", DataViewObject::fun_getFloat16, 1, 0),
+    JS_INLINABLE_FN("getFloat16", DataViewObject::fun_getFloat16, 1, 0,
+                    DataViewGetFloat16),
     JS_INLINABLE_FN("getFloat32", DataViewObject::fun_getFloat32, 1, 0,
                     DataViewGetFloat32),
     JS_INLINABLE_FN("getFloat64", DataViewObject::fun_getFloat64, 1, 0,

diff --git a/js/src/jit/CacheIR.cpp b/js/src/jit/CacheIR.cpp
@@ -11711,6 +11711,8 @@ AttachDecision InlinableNativeIRGenerator::tryAttachStub() {
       return tryAttachDataViewGet(Scalar::Int32);
     case InlinableNative::DataViewGetUint32:
       return tryAttachDataViewGet(Scalar::Uint32);
+    case InlinableNative::DataViewGetFloat16:
+      return tryAttachDataViewGet(Scalar::Float16);
     case InlinableNative::DataViewGetFloat32:
       return tryAttachDataViewGet(Scalar::Float32);
     case InlinableNative::DataViewGetFloat64:

diff --git a/js/src/jit/CacheIRCompiler.cpp b/js/src/jit/CacheIRCompiler.cpp
@@ -6898,16 +6898,18 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
   Register64 outputReg64 = output.valueReg().toRegister64();
   Register outputScratch = outputReg64.scratchReg();
 
-  Register boundsCheckScratch;
+  Register scratch2;
 #ifndef JS_CODEGEN_X86
-  Maybe<AutoScratchRegister> maybeBoundsCheckScratch;
-  if (viewKind == ArrayBufferViewKind::Resizable) {
-    maybeBoundsCheckScratch.emplace(allocator, masm);
-    boundsCheckScratch = *maybeBoundsCheckScratch;
+  Maybe<AutoScratchRegister> maybeScratch2;
+  if (viewKind == ArrayBufferViewKind::Resizable ||
+      (elementType == Scalar::Float16 &&
+       !MacroAssembler::SupportsFloat32To16())) {
+    maybeScratch2.emplace(allocator, masm);
+    scratch2 = *maybeScratch2;
   }
 #else
   // Not enough registers on x86, so use the other part of outputReg64.
-  boundsCheckScratch = outputReg64.secondScratchReg();
+  scratch2 = outputReg64.secondScratchReg();
 #endif
 
   FailurePath* failure;
@@ -6918,7 +6920,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
   const size_t byteSize = Scalar::byteSize(elementType);
 
   emitDataViewBoundsCheck(viewKind, byteSize, obj, offset, outputScratch,
-                          boundsCheckScratch, failure->label());
+                          scratch2, failure->label());
 
   masm.loadPtr(Address(obj, DataViewObject::dataOffset()), outputScratch);
 
@@ -6935,6 +6937,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
       masm.load16UnalignedSignExtend(source, outputScratch);
       break;
     case Scalar::Uint16:
+    case Scalar::Float16:
       masm.load16UnalignedZeroExtend(source, outputScratch);
       break;
     case Scalar::Int32:
@@ -6963,6 +6966,7 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
         masm.byteSwap16SignExtend(outputScratch);
         break;
       case Scalar::Uint16:
+      case Scalar::Float16:
         masm.byteSwap16ZeroExtend(outputScratch);
         break;
       case Scalar::Int32:
@@ -7002,6 +7006,15 @@ bool CacheIRCompiler::emitLoadDataViewValueResult(
                      failure->label());
       break;
     }
+    case Scalar::Float16: {
+      FloatRegister scratchFloat32 = floatScratch0.get().asSingle();
+      masm.moveGPRToFloat16(outputScratch, scratchFloat32, scratch2,
+                            liveVolatileRegs());
+      masm.canonicalizeFloat(scratchFloat32);
+      masm.convertFloat32ToDouble(scratchFloat32, floatScratch0);
+      masm.boxDouble(floatScratch0, output.valueReg(), floatScratch0);
+      break;
+    }
     case Scalar::Float32: {
       FloatRegister scratchFloat32 = floatScratch0.get().asSingle();
       masm.moveGPRToFloat32(outputScratch, scratchFloat32);

diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp
@@ -18137,13 +18137,19 @@ void CodeGenerator::visitLoadUnboxedBigInt(LLoadUnboxedBigInt* lir) {
 void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
   Register elements = ToRegister(lir->elements());
   const LAllocation* littleEndian = lir->littleEndian();
-  Register temp = ToTempRegisterOrInvalid(lir->temp());
+  Register temp1 = ToTempRegisterOrInvalid(lir->temp1());
+  Register temp2 = ToTempRegisterOrInvalid(lir->temp2());
   Register64 temp64 = ToTempRegister64OrInvalid(lir->temp64());
   AnyRegister out = ToAnyRegister(lir->output());
 
   const MLoadDataViewElement* mir = lir->mir();
   Scalar::Type storageType = mir->storageType();
 
+  LiveRegisterSet volatileRegs;
+  if (MacroAssembler::LoadRequiresCall(storageType)) {
+    volatileRegs = liveVolatileRegs(lir);
+  }
+
   BaseIndex source(elements, ToRegister(lir->index()), TimesOne);
 
   bool noSwap = littleEndian->isConstant() &&
@@ -18154,19 +18160,17 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
   if (noSwap && (!Scalar::isFloatingType(storageType) ||
                  MacroAssembler::SupportsFastUnalignedFPAccesses())) {
     if (!Scalar::isBigIntType(storageType)) {
-      MOZ_ASSERT(storageType != Scalar::Float16);
-
       Label fail;
-      masm.loadFromTypedArray(storageType, source, out, temp, InvalidReg, &fail,
-                              LiveRegisterSet{});
+      masm.loadFromTypedArray(storageType, source, out, temp1, temp2, &fail,
+                              volatileRegs);
 
       if (fail.used()) {
         bailoutFrom(&fail, lir->snapshot());
       }
     } else {
       masm.load64(source, temp64);
 
-      emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp);
+      emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp1);
     }
     return;
   }
@@ -18183,10 +18187,13 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
       masm.load32Unaligned(source, out.gpr());
       break;
     case Scalar::Uint32:
-      masm.load32Unaligned(source, out.isFloat() ? temp : out.gpr());
+      masm.load32Unaligned(source, out.isFloat() ? temp1 : out.gpr());
+      break;
+    case Scalar::Float16:
+      masm.load16UnalignedZeroExtend(source, temp1);
       break;
     case Scalar::Float32:
-      masm.load32Unaligned(source, temp);
+      masm.load32Unaligned(source, temp1);
       break;
     case Scalar::Float64:
     case Scalar::BigInt64:
@@ -18220,10 +18227,13 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
         masm.byteSwap32(out.gpr());
         break;
       case Scalar::Uint32:
-        masm.byteSwap32(out.isFloat() ? temp : out.gpr());
+        masm.byteSwap32(out.isFloat() ? temp1 : out.gpr());
+        break;
+      case Scalar::Float16:
+        masm.byteSwap16ZeroExtend(temp1);
         break;
       case Scalar::Float32:
-        masm.byteSwap32(temp);
+        masm.byteSwap32(temp1);
         break;
       case Scalar::Float64:
       case Scalar::BigInt64:
@@ -18250,16 +18260,20 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
       break;
     case Scalar::Uint32:
       if (out.isFloat()) {
-        masm.convertUInt32ToDouble(temp, out.fpu());
+        masm.convertUInt32ToDouble(temp1, out.fpu());
       } else {
         // Bail out if the value doesn't fit into a signed int32 value. This
         // is what allows MLoadDataViewElement to have a type() of
         // MIRType::Int32 for UInt32 array loads.
         bailoutTest32(Assembler::Signed, out.gpr(), out.gpr(), lir->snapshot());
       }
       break;
+    case Scalar::Float16:
+      masm.moveGPRToFloat16(temp1, out.fpu(), temp2, volatileRegs);
+      masm.canonicalizeFloat(out.fpu());
+      break;
     case Scalar::Float32:
-      masm.moveGPRToFloat32(temp, out.fpu());
+      masm.moveGPRToFloat32(temp1, out.fpu());
       masm.canonicalizeFloat(out.fpu());
       break;
     case Scalar::Float64:
@@ -18268,7 +18282,7 @@ void CodeGenerator::visitLoadDataViewElement(LLoadDataViewElement* lir) {
       break;
     case Scalar::BigInt64:
     case Scalar::BigUint64:
-      emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp);
+      emitCreateBigInt(lir, storageType, temp64, out.gpr(), temp1);
       break;
     case Scalar::Int8:
     case Scalar::Uint8:

diff --git a/js/src/jit/InlinableNatives.cpp b/js/src/jit/InlinableNatives.cpp
@@ -264,6 +264,7 @@ bool js::jit::CanInlineNativeCrossRealm(InlinableNative native) {
     case InlinableNative::DataViewGetUint16:
     case InlinableNative::DataViewGetInt32:
     case InlinableNative::DataViewGetUint32:
+    case InlinableNative::DataViewGetFloat16:
     case InlinableNative::DataViewGetFloat32:
     case InlinableNative::DataViewGetFloat64:
     case InlinableNative::DataViewGetBigInt64:

diff --git a/js/src/jit/InlinableNatives.h b/js/src/jit/InlinableNatives.h
@@ -46,6 +46,7 @@
   _(DataViewGetUint16)                             \
   _(DataViewGetInt32)                              \
   _(DataViewGetUint32)                             \
+  _(DataViewGetFloat16)                            \
   _(DataViewGetFloat32)                            \
   _(DataViewGetFloat64)                            \
   _(DataViewGetBigInt64)                           \

diff --git a/js/src/jit/Lowering.cpp b/js/src/jit/Lowering.cpp
@@ -4589,40 +4589,49 @@ void LIRGenerator::visitLoadDataViewElement(MLoadDataViewElement* ins) {
 
   // We need a temp register for:
   // - Uint32Array with known double result,
+  // - Float16Array,
   // - Float32Array,
   // - and BigInt64Array and BigUint64Array.
-  LDefinition tempDef = LDefinition::BogusTemp();
+  LDefinition temp1 = LDefinition::BogusTemp();
   if ((ins->storageType() == Scalar::Uint32 &&
        IsFloatingPointType(ins->type())) ||
+      ins->storageType() == Scalar::Float16 ||
       ins->storageType() == Scalar::Float32) {
-    tempDef = temp();
+    temp1 = temp();
   }
   if (Scalar::isBigIntType(ins->storageType())) {
 #ifdef JS_CODEGEN_X86
     // There are not enough registers on x86.
     if (littleEndian.isConstant()) {
-      tempDef = temp();
+      temp1 = temp();
     }
 #else
-    tempDef = temp();
+    temp1 = temp();
 #endif
   }
 
+  // Additional temp when Float16 to Float64 conversion requires a call.
+  LDefinition temp2 = LDefinition::BogusTemp();
+  if (MacroAssembler::LoadRequiresCall(ins->storageType())) {
+    temp2 = temp();
+  }
+
   // We also need a separate 64-bit temp register for:
   // - Float64Array
   // - and BigInt64Array and BigUint64Array.
-  LInt64Definition temp64Def = LInt64Definition::BogusTemp();
+  LInt64Definition temp64 = LInt64Definition::BogusTemp();
   if (Scalar::byteSize(ins->storageType()) == 8) {
-    temp64Def = tempInt64();
+    temp64 = tempInt64();
   }
 
   auto* lir = new (alloc())
-      LLoadDataViewElement(elements, index, littleEndian, tempDef, temp64Def);
+      LLoadDataViewElement(elements, index, littleEndian, temp1, temp2, temp64);
   if (ins->fallible()) {
     assignSnapshot(lir, ins->bailoutKind());
   }
   define(lir, ins);
-  if (Scalar::isBigIntType(ins->storageType())) {
+  if (Scalar::isBigIntType(ins->storageType()) ||
+      MacroAssembler::LoadRequiresCall(ins->storageType())) {
     assignSafepoint(lir, ins);
   }
 }

diff --git a/js/src/jit/MIR.h b/js/src/jit/MIR.h
@@ -6732,7 +6732,7 @@ class MLoadDataViewElement : public MTernaryInstruction,
   void computeRange(TempAllocator& alloc) override;
 
   bool canProduceFloat32() const override {
-    return storageType_ == Scalar::Float32;
+    return storageType_ == Scalar::Float32 || storageType_ == Scalar::Float16;
   }
 
   ALLOW_CLONE(MLoadDataViewElement)

diff --git a/js/src/jit/MacroAssembler.cpp b/js/src/jit/MacroAssembler.cpp
@@ -7702,6 +7702,33 @@ template void MacroAssembler::loadFloat16(const BaseIndex& src,
                                           Register temp2,
                                           LiveRegisterSet volatileLiveRegs);
 
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest,
+                                      Register temp,
+                                      LiveRegisterSet volatileLiveRegs) {
+  if (MacroAssembler::SupportsFloat32To16()) {
+    moveGPRToFloat16(src, dest);
+
+    // Float16 is currently passed as Float32, so expand again to Float32.
+    convertFloat16ToFloat32(dest, dest);
+    return;
+  }
+
+  LiveRegisterSet save = volatileLiveRegs;
+  save.takeUnchecked(dest);
+  save.takeUnchecked(dest.asDouble());
+  save.takeUnchecked(temp);
+
+  PushRegsInMask(save);
+
+  using Fn = float (*)(int32_t);
+  setupUnalignedABICall(temp);
+  passABIArg(src);
+  callWithABI<Fn, jit::Float16ToFloat32>(ABIType::Float32);
+  storeCallFloatResult(dest);
+
+  PopRegsInMask(save);
+}
+
 void MacroAssembler::debugAssertIsObject(const ValueOperand& val) {
 #ifdef DEBUG
   Label ok;

diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h
@@ -981,6 +981,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
   inline void move64(Imm64 imm, Register64 dest) PER_ARCH;
   inline void move64(Register64 src, Register64 dest) PER_ARCH;
 
+
+  // Clears the high words of `src`.
+  inline void moveGPRToFloat16(Register src,
+                               FloatRegister dest) PER_SHARED_ARCH;
+
   inline void moveFloat32ToGPR(FloatRegister src,
                                Register dest) PER_SHARED_ARCH;
   inline void moveGPRToFloat32(Register src,
@@ -5287,6 +5292,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
   void loadFloat16(const T& src, FloatRegister dest, Register temp1,
                    Register temp2, LiveRegisterSet volatileLiveRegs);
 
+  void moveGPRToFloat16(Register src, FloatRegister dest, Register temp,
+                        LiveRegisterSet volatileLiveRegs);
+
   void debugAssertIsObject(const ValueOperand& val);
   void debugAssertObjHasFixedSlots(Register obj, Register scratch);
 

diff --git a/js/src/jit/arm/MacroAssembler-arm-inl.h b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -24,6 +24,10 @@ void MacroAssembler::move64(Imm64 imm, Register64 dest) {
   move32(Imm32((imm.value >> 32) & 0xFFFFFFFFL), dest.high);
 }
 
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
+  MOZ_CRASH("Not supported for this target");
+}
+
 void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
   ma_vxfer(src, dest);
 }

diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@@ -22,6 +22,15 @@ void MacroAssembler::move64(Imm64 imm, Register64 dest) {
   Mov(ARMRegister(dest.reg, 64), imm.value);
 }
 
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
+  // Ensure the hi-word is zeroed.
+  Uxth(ARMRegister(src, 32), ARMRegister(src, 32));
+
+  // Direct "32-bit to half-precision" move requires (FEAT_FP16), so we
+  // instead use a "32-bit to single-precision" move.
+  Fmov(ARMFPRegister(dest, 32), ARMRegister(src, 32));
+}
+
 void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
   Fmov(ARMRegister(dest, 32), ARMFPRegister(src, 32));
 }

diff --git a/js/src/jit/loong64/MacroAssembler-loong64-inl.h b/js/src/jit/loong64/MacroAssembler-loong64-inl.h
@@ -60,6 +60,10 @@ void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
   moveToFloat32(src, dest);
 }
 
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
+  MOZ_CRASH("Not supported for this target");
+}
+
 void MacroAssembler::move8ZeroExtend(Register src, Register dest) {
   as_bstrpick_d(dest, src, 7, 0);
 }

diff --git a/js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h b/js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h
@@ -14,6 +14,10 @@ namespace jit {
 
 //{{{ check_macroassembler_style
 
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
+  MOZ_CRASH("Not supported for this target");
+}
+
 void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
   moveFromFloat32(src, dest);
 }

diff --git a/js/src/jit/riscv64/MacroAssembler-riscv64-inl.h b/js/src/jit/riscv64/MacroAssembler-riscv64-inl.h
@@ -1606,6 +1606,9 @@ void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
 void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
   fmv_w_x(dest, src);
 }
+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
+  MOZ_CRASH("Not supported for this target");
+}
 void MacroAssembler::mul32(Register rhs, Register srcDest) {
   mulw(srcDest, srcDest, rhs);
 }