LowerTriangularMask.to inference_mode fix

ghstack-source-id: e6b798c2743aaed8b02b6a55a3d3e85deb45cf38 Pull Request resolved: fairinternal/xformers#1165 __original_commit__ = fairinternal/xformers@083e3a4
facebookresearch · Jul 26, 2024 · 3610a54 · 3610a54
1 parent 0b9cb70
commit 3610a54
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 1 deletion.
diff --git a/tests/test_mem_eff_attention.py b/tests/test_mem_eff_attention.py
@@ -1649,10 +1649,16 @@ def _test_to_copy(attn_bias: torch.Tensor) -> None:
     attn_bias = fmha.attn_bias.LowerTriangularMask().to("cpu")
     _test_to_copy(attn_bias)
 
+    with torch.inference_mode():
+        _test_to_copy(attn_bias)
+
     tensor_bias = torch.tensor([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]])
     attn_bias = fmha.attn_bias.LowerTriangularMaskWithTensorBias(tensor_bias).to("cpu")
     _test_to_copy(attn_bias)
 
+    with torch.inference_mode():
+        _test_to_copy(attn_bias)
+
 
 def _kv_heads_label(kv_heads: Optional[int]) -> str:
     if kv_heads is None:

diff --git a/xformers/ops/fmha/attn_bias.py b/xformers/ops/fmha/attn_bias.py
@@ -1568,8 +1568,9 @@ def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
             torch.ops.aten.clone,
             torch.ops.aten.detach,
             torch.ops.aten._to_copy,
+            torch.ops.aten.to,
         ]:
-            return cls(_subtensor=func(args[0]._subtensor, **kwargs))
+            return cls(_subtensor=func(args[0]._subtensor, *args[1:], **kwargs))
         return NotImplemented
 
     def __tensor_flatten__(self):
@@ -1669,6 +1670,7 @@ def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
             torch.ops.aten.clone,
             torch.ops.aten.detach,
             torch.ops.aten._to_copy,
+            torch.ops.aten.to,
         ]:
             output = func(
                 *[a._subtensor if isinstance(a, cls) else a for a in args],