diff --git a/include/dlaf/factorization/qr/t_factor_impl.h b/include/dlaf/factorization/qr/t_factor_impl.h
index 0bf1a2616c..49a9ac73b4 100644
--- a/include/dlaf/factorization/qr/t_factor_impl.h
+++ b/include/dlaf/factorization/qr/t_factor_impl.h
@@ -237,6 +237,9 @@ void QR_Tfactor<backend, device, T>::call(matrix::Panel<Coord::Col, T, device>&
   if (hh_panel.getWidth() == 0)
     return;
 
+  const SizeType bs = hh_panel.parentDistribution().blockSize().rows();
+  const SizeType offset_lc = (bs - hh_panel.tile_size_of_local_head().rows());
+
   matrix::ReadWriteTileSender<T, device> t_local = Helpers::set0(std::move(t));
 
   // Note:
@@ -257,15 +260,15 @@ void QR_Tfactor<backend, device, T>::call(matrix::Panel<Coord::Col, T, device>&
   // 1st step: compute the column partial result `t`
   // First we compute the matrix vector multiplication for each column
   // -tau(j) . V(j:, 0:j)* . V(j:, j)
-  for (const auto& i_lc : hh_panel.iteratorLocal()) {
+  for (const auto& v_i : hh_panel.iteratorLocal()) {
     const SizeType first_row_tile =
-        (i_lc.row() - hh_panel.rangeStartLocal()) * hh_panel.parentDistribution().tile_size().rows();
+        std::max<SizeType>(0, (v_i.row() - hh_panel.rangeStartLocal()) * bs - offset_lc);
 
     // Note:
     // Since we are writing always on the same t, the gemv are serialized
     // A possible solution to this would be to have multiple places where to store partial
     // results, and then locally reduce them just before the reduce over ranks
-    t_local = Helpers::gemvColumnT(first_row_tile, hh_panel.read(i_lc), taus, std::move(t_local));
+    t_local = Helpers::gemvColumnT(first_row_tile, hh_panel.read(v_i), taus, std::move(t_local));
   }
 
   // 2nd step: compute the T factor, by performing the last step on each column
diff --git a/include/dlaf/matrix/panel.h b/include/dlaf/matrix/panel.h
index 498ed60697..16b79476d0 100644
--- a/include/dlaf/matrix/panel.h
+++ b/include/dlaf/matrix/panel.h
@@ -304,6 +304,10 @@ struct Panel<axis, const T, D, StoreTransposed::No> {
     has_been_used_ = false;
   }
 
+  TileElementSize tile_size_of_local_head() const {
+    return tileSize(LocalTileIndex(coord, rangeStartLocal()));
+  }
+
 protected:
   using ReadWriteSenderType = typename BaseT::ReadWriteSenderType;