diff --git a/README.md b/README.md index be1fcee2..01a59855 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ It sits between the application and a 'worker' SPARSE library, marshalling inputs into the backend library and marshalling results back to the application. hipSPARSELt exports an interface that does not require the client to change, regardless of the chosen backend. Currently, hipSPARSELt supports -[rocSPARSELt](library/src/hcc_detial/rocsparselt) and [NVIDIA CUDA cuSPARSELt v0.4](https://docs.nvidia.com/cuda/cusparselt) +[rocSPARSELt](library/src/hcc_detial/rocsparselt) and [NVIDIA CUDA cuSPARSELt v0.6.3](https://docs.nvidia.com/cuda/cusparselt) as backends. > [!NOTE] @@ -87,7 +87,7 @@ so it may prompt you for a password. * Add kernel selection and genroator, used to provide the appropriate solution for the specific problem. * CUDA - * Support cusparseLt v0.4 + * Support cusparseLt v0.6.3 ## Documentation diff --git a/docs/reference/data-type-support.rst b/docs/reference/data-type-support.rst index f1a51311..9ca84122 100644 --- a/docs/reference/data-type-support.rst +++ b/docs/reference/data-type-support.rst @@ -26,14 +26,14 @@ Data type support - ✅ * - float8 - - HIP_R_8F_E4M3_FNUZ - - ❌ + - HIP_R_8F_E4M3 - ❌ + - ✅ * - bfloat8 - - HIP_R_8F_E5M2_FNUZ - - ❌ + - HIP_R_8F_E5M2 - ❌ + - ✅ * - int16 - Not Supported @@ -53,7 +53,7 @@ Data type support - int32 - Not Supported - ❌ - - ❌ + - ✅ * - tensorfloat32 - Not Supported @@ -120,12 +120,12 @@ Data type support - tensorfloat32 - Not Supported - ❌ - - ✅ + - ❌ * - float32 - HIPSPARSELT_COMPUTE_32F - ✅ - - ❌ + - ✅ * - float64 - Not Supported @@ -135,14 +135,23 @@ Data type support * List of supported compute types at specific input and output types: .. csv-table:: - :header: "Input", "Output", "Compute type", "Backend" + :header: "Input A/B", "Input C", "Output", "Compute type", "Backend" - "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "HIP" - "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "HIP" - "HIP_R_8I", "HIP_R_8I", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_8I", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_8I", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_16F", "CUDA" - "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_16F", "CUDA" - "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_TF32", "CUDA" - "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_TF32_FAST", "CUDA" + "HIP_R_32F", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_16F", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "HIP / CUDA" + "HIP_R_16F", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_16F", "CUDA" + "HIP_R_16BF", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "HIP / CUDA" + "HIP_R_8I", "HIP_R_8I", "HIP_R_8I", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8I", "HIP_R_32I", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32I", "CUDA" + "HIP_R_8I", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8I", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8F_E4M3", "HIP_R_16F", "HIP_R_8F_E4M3", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16BF", "HIP_R_8F_E4M3", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16F", "HIP_R_8F_E5M2", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16BF", "HIP_R_8F_E5M2", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" diff --git a/docs/reference/supported-functions.rst b/docs/reference/supported-functions.rst index c35c529b..3e9dc27c 100644 --- a/docs/reference/supported-functions.rst +++ b/docs/reference/supported-functions.rst @@ -48,4 +48,4 @@ ROCm & CUDA supported functions * CUDA - * Support cuSPARSELt v0.4 + * Support cuSPARSELt v0.6.3