Skip to content

Commit

Permalink
Add floor op
Browse files Browse the repository at this point in the history
Differential Revision: D54332190
  • Loading branch information
henryhu6 authored and facebook-github-bot committed Feb 28, 2024
1 parent a99c753 commit 559e983
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 0 deletions.
11 changes: 11 additions & 0 deletions fx2ait/fx2ait/converters/ait_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,17 @@ def acc_ops_floor_div(
return create_binary_op(FuncEnum.FLOOR_DIV, args, kwargs, name)


@ait_converter(acc_ops.floor)
def acc_ops_floor(
target: Target,
args: Tuple[Argument, ...],
kwargs: Dict[str, Argument],
name: str,
) -> ConverterOutput:
input_val = kwargs["input"]
return elementwise(FuncEnum.FLOOR)(input_val)


@ait_converter(acc_ops.add)
def acc_ops_add(
target: Target,
Expand Down
2 changes: 2 additions & 0 deletions fx2ait/fx2ait/converters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def get_python_op_from_ait_constant_elementwise_op(
return math.sqrt
elif op_type == FuncEnum.FLOOR_DIV:
return operator.floordiv
elif op_type == FuncEnum.FLOOR:
return math.floor
else:
raise RuntimeError(f"{op_type} is not supported yet!")

Expand Down
7 changes: 7 additions & 0 deletions python/aitemplate/backend/backend_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@ class GPUBackendSpec(BackendSpec):
"bfloat16": "floor_div",
"bfloat16_2": "floor_div",
},
FuncEnum.FLOOR: {
"float": "floor",
"half": "floor",
"half2": "floor",
"bfloat16": "floor",
"bfloat16_2": "floor",
},
FuncEnum.CELU: {
"float": "fcelu",
"half": "hcelu",
Expand Down
27 changes: 27 additions & 0 deletions python/aitemplate/backend/cuda/elementwise/custom_math.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,34 @@ __device__ half2 floor_div(const half2 a, const half2 b) {
__device__ bfloat16_2 floor_div(const bfloat16_2 a, const bfloat16_2 b) {
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
return bfloat16_2(floor_div(a.x, b.x), floor_div(a.y, b.y));
#else
NOT_IMPLEMENTED();
#endif
}

__device__ float floor(const float a) {
return floor(a);
}

__device__ half floor(const half a) {
return hfloor(a);
}

__device__ bfloat16 floor(const bfloat16 a) {
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
return hfloor(a);
#else
NOT_IMPLEMENTED();
#endif
}

__device__ half2 floor(const half2 a) {
return half2(floor(a.x), floor(a.y));
}

__device__ bfloat16_2 floor(const bfloat16_2 a) {
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
return bfloat16_2(floor(a.x), floor(a.y));
#else
NOT_IMPLEMENTED();
#endif
Expand Down
1 change: 1 addition & 0 deletions python/aitemplate/compiler/ops/common/epilogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,4 @@ class FuncEnum(Enum):
SOFTSIGN = 27
FLOOR_DIV = 28
CELU = 29
FLOOR = 30
4 changes: 4 additions & 0 deletions python/aitemplate/compiler/ops/common/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,7 @@ def floor_div(tensor: Any) -> Tensor:

def celu(tensor: Any) -> Tensor:
return OP_REGISTRY.get("CELU")(tensor)


def floor(tensor: Any) -> Tensor:
return OP_REGISTRY.get("FLOOR")(tensor)
54 changes: 54 additions & 0 deletions tests/unittest/ops/test_activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,38 @@ def _test_floor_div(
module.run_with_tensors([x1_pt], [x2])
torch.testing.assert_close(x2, x2_pt, atol=1e-2, rtol=1e-2)

def _test_floor(
self,
input_size,
test_name="floor",
copy_op=False,
dtype="float16",
):
assert len(input_size) == 2
X1 = Tensor(
shape=[IntImm(input_size[0]), IntImm(input_size[1])],
dtype=dtype,
name="input0",
is_input=True,
)
X2_op = ops.elementwise(FuncEnum.FLOOR)

if copy_op:
X2_op = ops.elementwise(**X2_op._get_op_attributes())
X2 = X2_op(X1)
X2._attrs["is_output"] = True
X2._attrs["name"] = "output0"

target = detect_target()
module = compile_model(X2, target, "./tmp", f"{test_name}_{dtype}")

x1_pt = get_random_torch_tensor(input_size, dtype)
x2_pt = torch.floor(x1_pt)

x2 = torch.empty_like(x2_pt)
module.run_with_tensors([x1_pt], [x2])
torch.testing.assert_close(x2, x2_pt, atol=1e-2, rtol=1e-2)

def _test_hardtanh(
self,
input_size,
Expand Down Expand Up @@ -816,6 +848,28 @@ def test_floor_div(self, dtype):
dtype=dtype,
)

@parameterized.expand(
**filter_test_cases_by_params(
{
TestEnv.CUDA_LESS_THAN_SM80: [("float16"), ("float32")],
TestEnv.CUDA_SM80: [("bfloat16")],
TestEnv.ROCM: [("float16")],
}
)
)
def test_floor(self, dtype):
self._test_floor(
[511, 511],
test_name="test_floor",
dtype=dtype,
)
self._test_floor(
[1024, 1024],
test_name="test_floor_copy_op",
copy_op=True,
dtype=dtype,
)

@parameterized.expand(
**filter_test_cases_by_params(
{
Expand Down

0 comments on commit 559e983

Please sign in to comment.