Bug 1835034 - Part 9: Inline storing into Float16Array. r=jandem

Slightly larger changes when compared to the previous two parts, because `MacroAssembler::storeToTypedFloatArray` had to be changed to support conversions instead of performing conversion in its caller: - `CacheIRCompiler::emitStoreTypedArrayElement` used `ScratchFloat32Scope` to convert `double -> float32`, but using the same approach won't work for float16, because `ScratchFloat32Scope` is also needed in `MacroAssembler::storeFloat16` to convert `float32 -> float16`. - Therefore move the conversion `double -> float32` into `StoreToTypedFloatArray` - And the conversions `double -> float16` into `MacroAssembler::storeFloat16`. Codegen for `StoreUnboxedScalar` on x64 looks like: vcvtps2ph $0x4, %xmm0, %xmm15 vmovd %xmm15, %r11d movw %r11w, 0x0(%rdx,%rbx,2) And on ARM64: h31, s0 h31, [x2, x4, lsl #1] Depends on D215769 Differential Revision: https://phabricator.services.mozilla.com/D215770
i3roly · Jul 13, 2024 · e67263d · e67263d
1 parent 33d0c3a
commit e67263d
Show file tree

Hide file tree

Showing 23 changed files with 436 additions and 39 deletions.
diff --git a/js/src/jit-test/tests/warp/float16-as-float32-specialization-for-float16array-hole.js b/js/src/jit-test/tests/warp/float16-as-float32-specialization-for-float16array-hole.js
@@ -0,0 +1,87 @@
+function float32(f16, i, index) {
+  // Unbox to Int32.
+  i = i|0;
+
+  // Float32 addition.
+  let x = Math.fround(i + 0.1);
+
+  // Float32 square root.
+  let y = Math.fround(Math.sqrt(x));
+
+  // Store as Float16.
+  f16[index] = y;
+}
+
+function float64(f16, i, index) {
+  // Unbox to Int32.
+  i = i|0;
+
+  // Float32 addition.
+  let x = Math.fround(i + 0.1);
+
+  // Float64 square root.
+  let y = Math.sqrt(x);
+
+  // Store as Float16.
+  f16[index] = y;
+}
+
+function toBaseline(f) {
+  let source = f.toString();
+  assertEq(source.at(-1), "}");
+
+  // Add with-statement to disable Ion compilation.
+  source = source.slice(0, -1) + "; with ({}); }";
+
+  return Function(`return ${source};`)();
+}
+
+// Different results are expected for these inputs:
+//
+// Input    Float64-SQRT  Float32-SQRT
+// -----------------------------------
+// 1527     39.09375      39.0625
+// 16464    128.375       128.25
+// 18581    136.375       136.25
+// 20826    144.375       144.25
+// 23199    152.375       152.25
+// 25700    160.375       160.25
+// 28329    168.375       168.25
+// 31086    176.375       176.25
+//
+// Limit execution to 1550 to avoid spending too much time on this single test.
+//
+// 1550 iterations should still be enough to allow tiering up to Ion, at least
+// under eager compilation settings.
+const LIMIT = 1550;
+
+let float32_baseline = toBaseline(float32);
+let float64_baseline = toBaseline(float64);
+
+let f16 = new Float16Array(1);
+let u16 = new Uint16Array(f16.buffer);
+
+let n = 0;
+for (let i = 0; i < LIMIT; ++i) {
+  // Call with out-of-bounds indices to trigger compilation with
+  // MStoreTypedArrayElementHole.
+  float32(f16, i, 100_000);
+  float64(f16, i, 100_000);
+
+  float32(f16, i, 0);
+  let x = u16[0];
+
+  float32_baseline(f16, i, 0);
+  assertEq(x, u16[0]);
+
+  float64(f16, i, 0);
+  let y = u16[0];
+
+  float64_baseline(f16, i, 0);
+  assertEq(y, u16[0]);
+
+  if (x !== y) {
+    n++;
+  }
+}
+assertEq(n, 1);
diff --git a/js/src/jit-test/tests/warp/float16-as-float32-specialization-for-float16array.js b/js/src/jit-test/tests/warp/float16-as-float32-specialization-for-float16array.js
@@ -0,0 +1,82 @@
+function float32(f16, i) {
+  // Unbox to Int32.
+  i = i|0;
+
+  // Float32 addition.
+  let x = Math.fround(i + 0.1);
+
+  // Float32 square root.
+  let y = Math.fround(Math.sqrt(x));
+
+  // Store as Float16.
+  f16[0] = y;
+}
+
+function float64(f16, i) {
+  // Unbox to Int32.
+  i = i|0;
+
+  // Float32 addition.
+  let x = Math.fround(i + 0.1);
+
+  // Float64 square root.
+  let y = Math.sqrt(x);
+
+  // Store as Float16.
+  f16[0] = y;
+}
+
+function toBaseline(f) {
+  let source = f.toString();
+  assertEq(source.at(-1), "}");
+
+  // Add with-statement to disable Ion compilation.
+  source = source.slice(0, -1) + "; with ({}); }";
+
+  return Function(`return ${source};`)();
+}
+
+// Different results are expected for these inputs:
+//
+// Input    Float64-SQRT  Float32-SQRT
+// -----------------------------------
+// 1527     39.09375      39.0625
+// 16464    128.375       128.25
+// 18581    136.375       136.25
+// 20826    144.375       144.25
+// 23199    152.375       152.25
+// 25700    160.375       160.25
+// 28329    168.375       168.25
+// 31086    176.375       176.25
+//
+// Limit execution to 1550 to avoid spending too much time on this single test.
+//
+// 1550 iterations should still be enough to allow tiering up to Ion, at least
+// under eager compilation settings.
+const LIMIT = 1550;
+
+let float32_baseline = toBaseline(float32);
+let float64_baseline = toBaseline(float64);
+
+let f16 = new Float16Array(1);
+let u16 = new Uint16Array(f16.buffer);
+
+let n = 0;
+for (let i = 0; i < LIMIT; ++i) {
+  float32(f16, i);
+  let x = u16[0];
+
+  float32_baseline(f16, i);
+  assertEq(x, u16[0]);
+
+  float64(f16, i);
+  let y = u16[0];
+
+  float64_baseline(f16, i);
+  assertEq(y, u16[0]);
+
+  if (x !== y) {
+    n++;
+  }
+}
+assertEq(n, 1);
diff --git a/js/src/jit/ABIFunctionList-inl.h b/js/src/jit/ABIFunctionList-inl.h
@@ -148,6 +148,7 @@ namespace jit {
   _(js::jit::EqualStringsHelperPure)                                  \
   _(js::jit::FinishBailoutToBaseline)                                 \
   _(js::jit::Float16ToFloat32)                                        \
+  _(js::jit::Float32ToFloat16)                                        \
   _(js::jit::FrameIsDebuggeeCheck)                                    \
   _(js::jit::GetContextSensitiveInterpreterStub)                      \
   _(js::jit::GetIndexFromString)                                      \

diff --git a/js/src/jit/ABIFunctionType.yaml b/js/src/jit/ABIFunctionType.yaml
@@ -55,6 +55,10 @@
   ret: General
   args: [Float32]
 
+# int32_t f(float32)
+- ret: Int32
+  args: [Float32]
+
 # float f(float)
 - ret: Float32
   args: [Float32]

diff --git a/js/src/jit/CacheIR.cpp b/js/src/jit/CacheIR.cpp
@@ -5126,11 +5126,6 @@ AttachDecision SetPropIRGenerator::tryAttachSetTypedArrayElement(
   auto* tarr = &obj->as<TypedArrayObject>();
   Scalar::Type elementType = tarr->type();
 
-  if (elementType == Scalar::Float16) {
-    // TODO: See Bug 1835034 for JIT support for Float16Array.
-    return AttachDecision::NoAction;
-  }
-
   // Don't attach if the input type doesn't match the guard added below.
   if (!ValueCanConvertToNumeric(elementType, rhsVal_)) {
     return AttachDecision::NoAction;

diff --git a/js/src/jit/CacheIRCompiler.cpp b/js/src/jit/CacheIRCompiler.cpp
@@ -6611,7 +6611,7 @@ bool CacheIRCompiler::emitStoreTypedArrayElement(ObjOperandId objId,
   AutoScratchRegister scratch1(allocator, masm);
   Maybe<AutoScratchRegister> scratch2;
   Maybe<AutoSpectreBoundsScratchRegister> spectreScratch;
-  if (Scalar::isBigIntType(elementType) ||
+  if (Scalar::isBigIntType(elementType) || elementType == Scalar::Float16 ||
       viewKind == ArrayBufferViewKind::Resizable) {
     scratch2.emplace(allocator, masm);
   } else {
@@ -6651,12 +6651,10 @@ bool CacheIRCompiler::emitStoreTypedArrayElement(ObjOperandId objId,
 #ifndef JS_PUNBOX64
     masm.pop(obj);
 #endif
-  } else if (elementType == Scalar::Float32) {
-    ScratchFloat32Scope fpscratch(masm);
-    masm.convertDoubleToFloat32(floatScratch0, fpscratch);
-    masm.storeToTypedFloatArray(elementType, fpscratch, dest);
-  } else if (elementType == Scalar::Float64) {
-    masm.storeToTypedFloatArray(elementType, floatScratch0, dest);
+  } else if (Scalar::isFloatingType(elementType)) {
+    Register temp = scratch2 ? scratch2->get() : InvalidReg;
+    masm.storeToTypedFloatArray(elementType, floatScratch0, dest, temp,
+                                liveVolatileRegs());
   } else {
     masm.storeToTypedIntArray(elementType, *valInt32, dest);
   }

diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp
@@ -18487,9 +18487,12 @@ template void CodeGenerator::visitOutOfLineSwitch(
 template <typename T>
 static inline void StoreToTypedArray(MacroAssembler& masm,
                                      Scalar::Type writeType,
-                                     const LAllocation* value, const T& dest) {
-  if (writeType == Scalar::Float32 || writeType == Scalar::Float64) {
-    masm.storeToTypedFloatArray(writeType, ToFloatRegister(value), dest);
+                                     const LAllocation* value, const T& dest,
+                                     Register temp,
+                                     LiveRegisterSet volatileRegs) {
+  if (Scalar::isFloatingType(writeType)) {
+    masm.storeToTypedFloatArray(writeType, ToFloatRegister(value), dest, temp,
+                                volatileRegs);
   } else {
     if (value->isConstant()) {
       masm.storeToTypedIntArray(writeType, Imm32(ToInt32(value)), dest);
@@ -18501,19 +18504,25 @@ static inline void StoreToTypedArray(MacroAssembler& masm,
 
 void CodeGenerator::visitStoreUnboxedScalar(LStoreUnboxedScalar* lir) {
   Register elements = ToRegister(lir->elements());
+  Register temp = ToTempRegisterOrInvalid(lir->temp0());
   const LAllocation* value = lir->value();
 
   const MStoreUnboxedScalar* mir = lir->mir();
 
   Scalar::Type writeType = mir->writeType();
 
+  LiveRegisterSet volatileRegs;
+  if (MacroAssembler::StoreRequiresCall(writeType)) {
+    volatileRegs = liveVolatileRegs(lir);
+  }
+
   if (lir->index()->isConstant()) {
     Address dest = ToAddress(elements, lir->index(), writeType);
-    StoreToTypedArray(masm, writeType, value, dest);
+    StoreToTypedArray(masm, writeType, value, dest, temp, volatileRegs);
   } else {
     BaseIndex dest(elements, ToRegister(lir->index()),
                    ScaleFromScalarType(writeType));
-    StoreToTypedArray(masm, writeType, value, dest);
+    StoreToTypedArray(masm, writeType, value, dest, temp, volatileRegs);
   }
 }
 
@@ -18557,7 +18566,9 @@ void CodeGenerator::visitStoreDataViewElement(LStoreDataViewElement* lir) {
   if (noSwap && (!Scalar::isFloatingType(writeType) ||
                  MacroAssembler::SupportsFastUnalignedFPAccesses())) {
     if (!Scalar::isBigIntType(writeType)) {
-      StoreToTypedArray(masm, writeType, value, dest);
+      MOZ_ASSERT(writeType != Scalar::Float16);
+      StoreToTypedArray(masm, writeType, value, dest, InvalidReg,
+                        LiveRegisterSet{});
     } else {
       masm.loadBigInt64(ToRegister(value), temp64);
       masm.storeToTypedBigIntArray(writeType, temp64, dest);
@@ -18671,17 +18682,22 @@ void CodeGenerator::visitStoreTypedArrayElementHole(
 
   Register index = ToRegister(lir->index());
   const LAllocation* length = lir->length();
-  Register spectreTemp = ToTempRegisterOrInvalid(lir->temp0());
+  Register temp = ToTempRegisterOrInvalid(lir->temp0());
+
+  LiveRegisterSet volatileRegs;
+  if (MacroAssembler::StoreRequiresCall(arrayType)) {
+    volatileRegs = liveVolatileRegs(lir);
+  }
 
   Label skip;
   if (length->isRegister()) {
-    masm.spectreBoundsCheckPtr(index, ToRegister(length), spectreTemp, &skip);
+    masm.spectreBoundsCheckPtr(index, ToRegister(length), temp, &skip);
   } else {
-    masm.spectreBoundsCheckPtr(index, ToAddress(length), spectreTemp, &skip);
+    masm.spectreBoundsCheckPtr(index, ToAddress(length), temp, &skip);
   }
 
   BaseIndex dest(elements, index, ScaleFromScalarType(arrayType));
-  StoreToTypedArray(masm, arrayType, value, dest);
+  StoreToTypedArray(masm, arrayType, value, dest, temp, volatileRegs);
 
   masm.bind(&skip);
 }

diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -2169,6 +2169,7 @@
     elements: WordSized
     index: WordSized
     value: WordSized
+  num_temps: 1
   mir_op: true
 
 - name: StoreUnboxedBigInt

diff --git a/js/src/jit/Lowering.cpp b/js/src/jit/Lowering.cpp
@@ -4718,6 +4718,8 @@ void LIRGenerator::visitStoreUnboxedScalar(MStoreUnboxedScalar* ins) {
   MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
 
   if (ins->isFloatWrite()) {
+    MOZ_ASSERT_IF(ins->writeType() == Scalar::Float16,
+                  ins->value()->type() == MIRType::Float32);
     MOZ_ASSERT_IF(ins->writeType() == Scalar::Float32,
                   ins->value()->type() == MIRType::Float32);
     MOZ_ASSERT_IF(ins->writeType() == Scalar::Float64,
@@ -4761,7 +4763,19 @@ void LIRGenerator::visitStoreUnboxedScalar(MStoreUnboxedScalar* ins) {
     add(fence, ins);
   }
   if (!ins->isBigIntWrite()) {
-    add(new (alloc()) LStoreUnboxedScalar(elements, index, value), ins);
+    // We need a temp register for Float16Array.
+    LDefinition tempDef = LDefinition::BogusTemp();
+    if (ins->writeType() == Scalar::Float16) {
+      tempDef = temp();
+    }
+
+    auto* lir =
+        new (alloc()) LStoreUnboxedScalar(elements, index, value, tempDef);
+    add(lir, ins);
+
+    if (MacroAssembler::StoreRequiresCall(ins->writeType())) {
+      assignSafepoint(lir, ins);
+    }
   } else {
     add(new (alloc()) LStoreUnboxedBigInt(elements, index, value, tempInt64()),
         ins);
@@ -4818,6 +4832,8 @@ void LIRGenerator::visitStoreTypedArrayElementHole(
   MOZ_ASSERT(ins->length()->type() == MIRType::IntPtr);
 
   if (ins->isFloatWrite()) {
+    MOZ_ASSERT_IF(ins->arrayType() == Scalar::Float16,
+                  ins->value()->type() == MIRType::Float32);
     MOZ_ASSERT_IF(ins->arrayType() == Scalar::Float32,
                   ins->value()->type() == MIRType::Float32);
     MOZ_ASSERT_IF(ins->arrayType() == Scalar::Float64,
@@ -4843,11 +4859,18 @@ void LIRGenerator::visitStoreTypedArrayElementHole(
   }
 
   if (!ins->isBigIntWrite()) {
-    LDefinition spectreTemp =
-        BoundsCheckNeedsSpectreTemp() ? temp() : LDefinition::BogusTemp();
-    auto* lir = new (alloc()) LStoreTypedArrayElementHole(
-        elements, length, index, value, spectreTemp);
+    LDefinition tempDef = LDefinition::BogusTemp();
+    if (BoundsCheckNeedsSpectreTemp() || ins->arrayType() == Scalar::Float16) {
+      tempDef = temp();
+    }
+
+    auto* lir = new (alloc())
+        LStoreTypedArrayElementHole(elements, length, index, value, tempDef);
     add(lir, ins);
+
+    if (MacroAssembler::StoreRequiresCall(ins->arrayType())) {
+      assignSafepoint(lir, ins);
+    }
   } else {
     auto* lir = new (alloc()) LStoreTypedArrayElementHoleBigInt(
         elements, length, index, value, tempInt64());