From bef03d674273d049bffba9a538c2b917a80150fb Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Fri, 15 Dec 2023 15:01:11 -0800
Subject: [PATCH 1/4] [HotFix][atamazov] multiple undefined behavior discovered
 with -fsanitize=undefined in DEV builds (#2609)

* fix-issue-2602(01) Fix for smoke_miopendriver_gemm

* Do not print output parameters in MIOPEN_LOG_FUNCTION calls.

---------

Co-authored-by: atamazov <artem.tamazov@gmail.com>
---
 driver/gemm_driver.hpp         | 10 +++++----
 src/activ_api.cpp              |  2 +-
 src/api/find2_0_commons.cpp    | 10 ++++-----
 src/convolution_api.cpp        | 35 ++++++++++++++---------------
 src/ctc_api.cpp                | 12 +++-------
 src/dropout_api.cpp            |  9 +++-----
 src/fused_api.cpp              |  4 ++--
 src/include/miopen/gemm_v2.hpp |  2 +-
 src/lrn_api.cpp                |  4 ++--
 src/pooling_api.cpp            | 19 +++++++---------
 src/reducetensor_api.cpp       | 11 +++-------
 src/rnn_api.cpp                | 40 +++++++++++++---------------------
 src/sum_api.cpp                |  2 +-
 src/tensor_api.cpp             |  8 +++----
 14 files changed, 70 insertions(+), 98 deletions(-)
diff --git a/driver/gemm_driver.hpp b/driver/gemm_driver.hpp
index f464d89270..12b615f405 100644
--- a/driver/gemm_driver.hpp
+++ b/driver/gemm_driver.hpp
@@ -143,7 +143,8 @@ class GemmDriver : public Driver
 
     T alpha, beta;
 
-    miopen::GemmDescriptor gemm_desc;
+    miopen::GemmDescriptor gemm_desc = {
+        false, false, false, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0f, 0.0f, miopenFloat, false};
 };
 
 template <typename T>
@@ -198,13 +199,13 @@ int GemmDriver<T>::GetandSetData()
     gemm_desc.a_cast_type = data_type;
     gemm_desc.b_cast_type = data_type;
 
-    gemm_desc.isColMajor = inflags.GetValueInt("isColMajor");
+    gemm_desc.isColMajor = inflags.GetValueInt("isColMajor") != 0;
     gemm_desc.m          = inflags.GetValueInt("a_h");
     gemm_desc.k          = inflags.GetValueInt("a_w");
     gemm_desc.n          = inflags.GetValueInt("b_w");
 
-    gemm_desc.transA = inflags.GetValueInt("transA");
-    gemm_desc.transB = inflags.GetValueInt("transB");
+    gemm_desc.transA = inflags.GetValueInt("transA") != 0;
+    gemm_desc.transB = inflags.GetValueInt("transB") != 0;
 
     gemm_desc.alpha = inflags.GetValueDouble("alpha");
     gemm_desc.beta  = inflags.GetValueDouble("beta");
@@ -225,6 +226,7 @@ int GemmDriver<T>::GetandSetData()
     gemm_desc.strideB = gemm_desc.k * gemm_desc.n;
     gemm_desc.strideC = gemm_desc.m * gemm_desc.n;
 
+    gemm_desc.deterministic = false;
     return (0);
 }
 
diff --git a/src/activ_api.cpp b/src/activ_api.cpp
index 8fe0373d63..05ccdbe708 100644
--- a/src/activ_api.cpp
+++ b/src/activ_api.cpp
@@ -60,7 +60,7 @@ extern "C" miopenStatus_t miopenGetActivationDescriptor(miopenActivationDescript
                                                         double* activGamma)
 {
 
-    MIOPEN_LOG_FUNCTION(activDesc, mode, activAlpha, activBeta, activGamma);
+    MIOPEN_LOG_FUNCTION(activDesc);
     return miopen::try_([&] {
         *mode       = miopen::deref(activDesc).GetMode();
         *activAlpha = miopen::deref(activDesc).GetAlpha();
diff --git a/src/api/find2_0_commons.cpp b/src/api/find2_0_commons.cpp
index 5428edf3a0..ce203197a5 100644
--- a/src/api/find2_0_commons.cpp
+++ b/src/api/find2_0_commons.cpp
@@ -354,7 +354,7 @@ miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char* data)
 
 miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size)
 {
-    MIOPEN_LOG_FUNCTION(solution, size);
+    MIOPEN_LOG_FUNCTION(solution);
 
     return miopen::try_([&] {
         if(size == nullptr)
@@ -374,7 +374,7 @@ miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size)
 
 miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t* workspaceSize)
 {
-    MIOPEN_LOG_FUNCTION(solution, workspaceSize);
+    MIOPEN_LOG_FUNCTION(solution);
 
     return miopen::try_([&] {
         const auto& solution_deref = miopen::deref(solution);
@@ -384,7 +384,7 @@ miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t*
 
 miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time)
 {
-    MIOPEN_LOG_FUNCTION(solution, time);
+    MIOPEN_LOG_FUNCTION(solution);
 
     return miopen::try_([&] {
         const auto& solution_deref = miopen::deref(solution);
@@ -394,7 +394,7 @@ miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time)
 
 miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t* solverId)
 {
-    MIOPEN_LOG_FUNCTION(solution, solverId);
+    MIOPEN_LOG_FUNCTION(solution);
 
     return miopen::try_([&] {
         const auto& solution_deref = miopen::deref(solution);
@@ -404,7 +404,7 @@ miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t* so
 
 miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId, miopenConvAlgorithm_t* result)
 {
-    MIOPEN_LOG_FUNCTION(solverId, result);
+    MIOPEN_LOG_FUNCTION(solverId);
 
     return miopen::try_([&] {
         const auto id_deref = miopen::solver::Id{solverId};
diff --git a/src/convolution_api.cpp b/src/convolution_api.cpp
index 6599bfd787..6929512e05 100644
--- a/src/convolution_api.cpp
+++ b/src/convolution_api.cpp
@@ -166,7 +166,7 @@ extern "C" miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDes
 extern "C" miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc,
                                                          int* groupCount)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, groupCount);
+    MIOPEN_LOG_FUNCTION(convDesc);
     return miopen::try_([&] { miopen::deref(groupCount) = miopen::deref(convDesc).group_count; });
 }
 
@@ -254,7 +254,7 @@ extern "C" miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescri
                                                          int* dilation_h,
                                                          int* dilation_w)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, c_mode, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w);
+    MIOPEN_LOG_FUNCTION(convDesc);
     return miopen::try_([&] {
         if(miopen::deref(convDesc).GetSpatialDimension() != 2)
         {
@@ -279,8 +279,7 @@ extern "C" miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDesc
                                                            int* dilationA,
                                                            miopenConvolutionMode_t* c_mode)
 {
-    MIOPEN_LOG_FUNCTION(
-        convDesc, requestedSpatialDim, spatialDim, padA, strideA, dilationA, c_mode);
+    MIOPEN_LOG_FUNCTION(convDesc, requestedSpatialDim);
     return miopen::try_([&] {
         int spatial_dim = miopen::deref(convDesc).GetSpatialDimension();
         if(spatial_dim < requestedSpatialDim)
@@ -305,7 +304,7 @@ extern "C" miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDesc
 extern "C" miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc,
                                                          int* spatialDim)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, spatialDim);
+    MIOPEN_LOG_FUNCTION(convDesc);
     return miopen::try_(
         [&] { miopen::deref(spatialDim) = miopen::deref(convDesc).GetSpatialDimension(); });
 }
@@ -319,7 +318,7 @@ miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
                                      int* h,
                                      int* w)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
+    MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc);
     return miopen::try_([&] {
         if(miopen::deref(convDesc).GetSpatialDimension() != 2)
         {
@@ -340,7 +339,7 @@ miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
                                        int* nDim,
                                        int* outputTensorDimA)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc, nDim, outputTensorDimA);
+    MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc);
     return miopen::try_([&] {
         auto out_desc = miopen::deref(convDesc).GetForwardOutputTensor(
             miopen::deref(inputTensorDesc), miopen::deref(filterDesc));
@@ -369,7 +368,7 @@ miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle,
                                          size_t* workSpaceSize)
 {
 
-    MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -690,7 +689,7 @@ miopenConvolutionForwardGetSolutionWorkspaceSize(miopenHandle_t handle,
                                                  const uint64_t solution_id,
                                                  size_t* workSpaceSize)
 {
-    MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, solution_id, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, solution_id);
     return miopen::try_([&] {
         if(miopen::deref(convDesc).mode == miopenTranspose)
         {
@@ -806,7 +805,7 @@ miopenConvolutionBackwardDataGetSolution(miopenHandle_t handle,
                                          size_t* solutionCount,
                                          miopenConvSolution_t* solutions)
 {
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, maxSolutionCount, solutionCount);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, maxSolutionCount);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -829,7 +828,7 @@ miopenConvolutionBackwardDataGetSolutionWorkspaceSize(miopenHandle_t handle,
                                                       const uint64_t solution_id,
                                                       size_t* workSpaceSize)
 {
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, solution_id, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, solution_id);
     return miopen::try_([&] {
         if(miopen::deref(convDesc).mode == miopenTranspose)
         {
@@ -924,7 +923,7 @@ miopenConvolutionBackwardWeightsGetSolutionCount(miopenHandle_t handle,
                                                  const miopenTensorDescriptor_t dwDesc,
                                                  size_t* solutionCount)
 {
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solutionCount);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -944,7 +943,7 @@ miopenConvolutionBackwardWeightsGetSolution(miopenHandle_t handle,
                                             size_t* solutionCount,
                                             miopenConvSolution_t* solutions)
 {
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, maxSolutionCount, solutionCount);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, maxSolutionCount);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -967,7 +966,7 @@ extern "C" miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionWorkspaceSi
     const uint64_t solution_id,
     size_t* workSpaceSize)
 {
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solution_id, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solution_id);
     return miopen::try_([&] {
         if(miopen::deref(convDesc).mode == miopenTranspose)
         {
@@ -1208,8 +1207,7 @@ miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle,
                                               const miopenTensorDescriptor_t dxDesc,
                                               size_t* workSpaceSize)
 {
-
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -1226,8 +1224,7 @@ miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle,
                                                  const miopenTensorDescriptor_t dwDesc,
                                                  size_t* workSpaceSize)
 {
-
-    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc);
     return miopen::try_([&] {
         auto ctx               = ExecutionContext{};
         auto problem           = ProblemDescription{};
@@ -1378,7 +1375,7 @@ extern "C" miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescrip
                                                         const miopenConvolutionAttrib_t attr,
                                                         int* const value)
 {
-    MIOPEN_LOG_FUNCTION(convDesc, attr, value);
+    MIOPEN_LOG_FUNCTION(convDesc, attr);
     return miopen::try_(
         [&] { miopen::deref(value) = miopen::deref(convDesc).attribute.Get(attr); });
 }
diff --git a/src/ctc_api.cpp b/src/ctc_api.cpp
index 5746912a92..a82e713f00 100644
--- a/src/ctc_api.cpp
+++ b/src/ctc_api.cpp
@@ -48,7 +48,7 @@ extern "C" miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t c
                                                      int* blank_label_id       = nullptr,
                                                      bool* apply_softmax_layer = nullptr)
 {
-    MIOPEN_LOG_FUNCTION(ctcLossDesc, dataType, blank_label_id, apply_softmax_layer);
+    MIOPEN_LOG_FUNCTION(ctcLossDesc);
     return miopen::try_([&] {
         miopen::deref(dataType) = miopen::deref(ctcLossDesc).dataType;
         if(blank_label_id != nullptr)
@@ -82,14 +82,8 @@ miopenGetCTCLossWorkspaceSize(miopenHandle_t handle,
                               const miopenCTCLossDescriptor_t ctcLossDesc,
                               size_t* workSpaceSize)
 {
-    MIOPEN_LOG_FUNCTION(probsDesc,
-                        gradientsDesc,
-                        labels,
-                        labelLengths,
-                        inputLengths,
-                        algo,
-                        ctcLossDesc,
-                        workSpaceSize);
+    MIOPEN_LOG_FUNCTION(
+        probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc);
 
     return miopen::try_([&] {
         miopen::deref(workSpaceSize) = miopen::deref(ctcLossDesc)
diff --git a/src/dropout_api.cpp b/src/dropout_api.cpp
index 6cd17381fd..2b50f7bf80 100644
--- a/src/dropout_api.cpp
+++ b/src/dropout_api.cpp
@@ -39,7 +39,6 @@ extern "C" miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_
 
 extern "C" miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc)
 {
-
     MIOPEN_LOG_FUNCTION(dropoutDesc);
     return miopen::try_([&] { miopen_destroy_object(dropoutDesc); });
 }
@@ -47,8 +46,7 @@ extern "C" miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor
 extern "C" miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc,
                                                            size_t* reserveSpaceSizeInBytes)
 {
-
-    MIOPEN_LOG_FUNCTION(xDesc, reserveSpaceSizeInBytes);
+    MIOPEN_LOG_FUNCTION(xDesc);
     return miopen::try_([&] {
         miopen::deref(reserveSpaceSizeInBytes) =
             miopen::deref(xDesc).GetElementSize() * sizeof(bool);
@@ -58,8 +56,7 @@ extern "C" miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDes
 extern "C" miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle,
                                                      size_t* stateSizeInBytes)
 {
-
-    MIOPEN_LOG_FUNCTION(stateSizeInBytes);
+    MIOPEN_LOG_FUNCTION(handle);
     return miopen::try_([&] {
         miopen::deref(stateSizeInBytes) =
             std::min(size_t(MAX_PRNG_STATE), miopen::deref(handle).GetImage3dMaxWidth()) *
@@ -76,7 +73,7 @@ extern "C" miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t d
                                                      bool* state_evo,
                                                      miopenRNGType_t* rng_mode)
 {
-    MIOPEN_LOG_FUNCTION(dropoutDesc, dropout, states, seed, use_mask, state_evo);
+    MIOPEN_LOG_FUNCTION(dropoutDesc);
     return miopen::try_([&] {
         miopen::deref(dropout)   = miopen::deref(dropoutDesc).dropout;
         miopen::deref(states)    = &(miopen::deref(dropoutDesc).pstates);
diff --git a/src/fused_api.cpp b/src/fused_api.cpp
index 0ee0cebc99..c2e2612e3d 100644
--- a/src/fused_api.cpp
+++ b/src/fused_api.cpp
@@ -87,7 +87,7 @@ miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle,
                                  size_t* workSpaceSize,
                                  miopenConvFwdAlgorithm_t algo)
 {
-    MIOPEN_LOG_FUNCTION(handle, fusePlanDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, fusePlanDesc, algo);
     miopenStatus_t res = miopenStatusUnknownError;
     miopen::try_([&] {
         size_t sz;
@@ -103,7 +103,7 @@ miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc,
                                    int* returnedAlgoCount,
                                    miopenConvFwdAlgorithm_t* returnedAlgos)
 {
-    MIOPEN_LOG_FUNCTION(fusePlanDesc, requestAlgoCount, returnedAlgoCount, returnedAlgos);
+    MIOPEN_LOG_FUNCTION(fusePlanDesc, requestAlgoCount);
     miopenStatus_t res = miopenStatusUnknownError;
     miopen::try_([&] {
         int cnt = 0;
diff --git a/src/include/miopen/gemm_v2.hpp b/src/include/miopen/gemm_v2.hpp
index 9300ffa29b..4d7bb8e153 100644
--- a/src/include/miopen/gemm_v2.hpp
+++ b/src/include/miopen/gemm_v2.hpp
@@ -77,7 +77,7 @@ struct GemmDescriptor
     miopenDataType_t a_cast_type;
     miopenDataType_t b_cast_type;
     ConvolutionAttribute conv_attributes;
-    GemmDescriptor() {}
+    GemmDescriptor() = delete;
     GemmDescriptor(bool isColMajor_,
                    bool transA_,
                    bool transB_,
diff --git a/src/lrn_api.cpp b/src/lrn_api.cpp
index d922d2c489..fa7f966992 100644
--- a/src/lrn_api.cpp
+++ b/src/lrn_api.cpp
@@ -56,8 +56,7 @@ extern "C" miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrn
                                                  double* lrnBeta,
                                                  double* lrnK)
 {
-
-    MIOPEN_LOG_FUNCTION(lrnDesc, mode, lrnN, lrnAlpha, lrnBeta, lrnK);
+    MIOPEN_LOG_FUNCTION(lrnDesc);
     return miopen::try_([&] {
         *mode     = miopen::deref(lrnDesc).GetMode();
         *lrnN     = miopen::deref(lrnDesc).GetN();
@@ -70,6 +69,7 @@ extern "C" miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrn
 extern "C" miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
                                                     size_t* workSpaceSize)
 {
+    MIOPEN_LOG_FUNCTION(yDesc);
 
     // TODO: Supporting size 4 bytes only
     return miopen::try_([&] {
diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp
index a2c792f8bb..321194ffe5 100644
--- a/src/pooling_api.cpp
+++ b/src/pooling_api.cpp
@@ -137,7 +137,7 @@ extern "C" miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t po
 extern "C" miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc,
                                                     miopenIndexType_t* index_type)
 {
-    MIOPEN_LOG_FUNCTION(poolDesc, index_type);
+    MIOPEN_LOG_FUNCTION(poolDesc);
     return miopen::try_([&] { *index_type = miopen::deref(poolDesc).GetIndexType(); });
 }
 
@@ -153,7 +153,7 @@ extern "C" miopenStatus_t
 miopenGetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc,
                                    miopenPoolingWorkspaceIndexMode_t* workspace_index)
 {
-    MIOPEN_LOG_FUNCTION(poolDesc, workspace_index);
+    MIOPEN_LOG_FUNCTION(poolDesc);
     return miopen::try_(
         [&] { *workspace_index = miopen::deref(poolDesc).GetWorkspaceIndexMode(); });
 }
@@ -189,8 +189,7 @@ extern "C" miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescri
                                                        int* stride_w)
 {
 
-    MIOPEN_LOG_FUNCTION(
-        poolDesc, mode, windowHeight, windowWidth, pad_h, pad_w, stride_h, stride_w);
+    MIOPEN_LOG_FUNCTION(poolDesc);
     return miopen::try_([&] {
         miopen::deref(mode) = miopen::deref(poolDesc).mode;
         std::tie(miopen::deref(windowHeight), miopen::deref(windowWidth)) =
@@ -224,7 +223,7 @@ extern "C" miopenStatus_t miopenGetNdPoolingDescriptor(miopenPoolingDescriptor_t
                                                        int* padA,
                                                        int* stridesA)
 {
-
+    MIOPEN_LOG_FUNCTION(poolDesc, nbDimsRequested);
     return miopen::try_([&] {
         if(mode != nullptr)
         {
@@ -262,7 +261,7 @@ miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
                                    int* tensorDimArr)
 {
 
-    MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, dims, tensorDimArr);
+    MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, dims);
     return miopen::try_([&] {
         miopen::deref(poolDesc).GetForwardOutputDimNd(
             miopen::deref(tensorDesc), dims, tensorDimArr);
@@ -278,7 +277,7 @@ miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
                                  int* w)
 {
 
-    MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, n, c, h, w);
+    MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc);
     return miopen::try_([&] {
         miopen::tie_deref(n, c, h, w) =
             miopen::deref(poolDesc).GetForwardOutputDim(miopen::deref(tensorDesc));
@@ -289,8 +288,7 @@ miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
 extern "C" miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
                                                         size_t* workSpaceSize)
 {
-
-    MIOPEN_LOG_FUNCTION(yDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(yDesc);
     return miopen::try_([&] {
         auto len  = miopen::deref(yDesc).GetLengths();
         size_t sz = std::accumulate(len.begin(), len.end(), size_t{1}, std::multiplies<size_t>());
@@ -302,8 +300,7 @@ extern "C" miopenStatus_t miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDes
                                                           const miopenTensorDescriptor_t yDesc,
                                                           size_t* workSpaceSize)
 {
-
-    MIOPEN_LOG_FUNCTION(poolDesc, yDesc, workSpaceSize);
+    MIOPEN_LOG_FUNCTION(poolDesc, yDesc);
     return miopen::try_(
         [&] { *workSpaceSize = miopen::deref(poolDesc).GetWorkSpaceSize(miopen::deref(yDesc)); });
 }
diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp
index 39efcf9eca..fc43a3ee1f 100644
--- a/src/reducetensor_api.cpp
+++ b/src/reducetensor_api.cpp
@@ -131,12 +131,7 @@ miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTenso
                                 miopenReduceTensorIndices_t* reduceTensorIndices,
                                 miopenIndicesType_t* reduceTensorIndicesType)
 {
-    MIOPEN_LOG_FUNCTION(reduceTensorDesc,
-                        reduceTensorOp,
-                        reduceTensorCompType,
-                        reduceTensorNanOpt,
-                        reduceTensorIndices,
-                        reduceTensorIndicesType);
+    MIOPEN_LOG_FUNCTION(reduceTensorDesc);
     return miopen::try_([&] {
         miopen::deref(reduceTensorOp)       = miopen::deref(reduceTensorDesc).reduceTensorOp_;
         miopen::deref(reduceTensorCompType) = miopen::deref(reduceTensorDesc).reduceTensorCompType_;
@@ -154,7 +149,7 @@ miopenGetReductionIndicesSize(miopenHandle_t handle,
                               const miopenTensorDescriptor_t cDesc,
                               size_t* sizeInBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
+    MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc);
 
     return miopen::try_([&] {
         miopen::deref(sizeInBytes) =
@@ -171,7 +166,7 @@ miopenGetReductionWorkspaceSize(miopenHandle_t handle,
                                 size_t* sizeInBytes)
 {
 
-    MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
+    MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc);
 
     return miopen::try_([&] {
         miopen::deref(sizeInBytes) = miopen::deref(reduceTensorDesc)
diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp
index 3f83c16826..4ad183f2bf 100644
--- a/src/rnn_api.cpp
+++ b/src/rnn_api.cpp
@@ -125,8 +125,7 @@ extern "C" miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc,
                                                  int* layer)
 {
 
-    MIOPEN_LOG_FUNCTION(
-        rnnDesc, rnnMode, algoMode, inputMode, dirMode, biasMode, hiddenSize, layer);
+    MIOPEN_LOG_FUNCTION(rnnDesc);
     return miopen::try_([&] {
         if(rnnMode != nullptr)
         {
@@ -170,16 +169,7 @@ extern "C" miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDes
                                                     miopenRNNAlgo_t* algoMode,
                                                     miopenDataType_t* dataType)
 {
-    MIOPEN_LOG_FUNCTION(rnnDesc,
-                        hiddenSize,
-                        layer,
-                        dropoutDesc,
-                        inputMode,
-                        dirMode,
-                        rnnMode,
-                        biasMode,
-                        algoMode,
-                        dataType);
+    MIOPEN_LOG_FUNCTION(rnnDesc);
     return miopen::try_([&] {
         if(rnnMode != nullptr)
         {
@@ -265,7 +255,7 @@ extern "C" miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle,
                                                     const miopenTensorDescriptor_t* xDesc,
                                                     size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc);
     miopen::c_array_view<const miopenTensorDescriptor_t> xDescArray{xDesc, size_t(sequenceLen)};
     return miopen::try_([&] {
         miopen::deref(numBytes) =
@@ -279,7 +269,7 @@ extern "C" miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle,
                                                           const miopenTensorDescriptor_t* xDesc,
                                                           size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc);
     miopen::c_array_view<const miopenTensorDescriptor_t> xDescArray{xDesc, size_t(sequenceLen)};
     return miopen::try_([&] {
         miopen::deref(numBytes) =
@@ -294,7 +284,7 @@ extern "C" miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle,
                                                      size_t* workSpaceSize,
                                                      size_t* reserveSpaceSize)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, workSpaceSize, reserveSpaceSize);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, fwdMode);
 
     return miopen::try_([&] {
         if(workSpaceSize != nullptr)
@@ -317,7 +307,7 @@ extern "C" miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle,
                                                        miopenTensorDescriptor_t wDesc,
                                                        miopenDataType_t dtype)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, wDesc, dtype);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, dtype);
     return miopen::try_([&] {
         miopen::deref(rnnDesc).GetParamsDescriptor(
             miopen::deref(handle), miopen::deref(xDesc), miopen::deref(wDesc), dtype);
@@ -330,7 +320,7 @@ extern "C" miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle,
                                                  size_t* numBytes,
                                                  miopenDataType_t dtype)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, numBytes, dtype);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, dtype);
     return miopen::try_([&] {
         miopen::deref(numBytes) = miopen::deref(rnnDesc).GetParamsSize(
             miopen::deref(handle), miopen::deref(xDesc), dtype);
@@ -343,7 +333,7 @@ extern "C" miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle,
                                                       miopenTensorDescriptor_t* xDesc,
                                                       size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc);
     miopen::c_array_view<miopenTensorDescriptor_t> xDescArray{xDesc, size_t(seqLen)};
     return miopen::try_([&] {
         miopen::deref(numBytes) = miopen::deref(rnnDesc).GetRNNInputSuperTensorSize(
@@ -357,7 +347,7 @@ extern "C" miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle,
                                                        miopenTensorDescriptor_t* xDesc,
                                                        size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc);
     miopen::c_array_view<miopenTensorDescriptor_t> xDescArray{xDesc, size_t(seqLen)};
     return miopen::try_([&] {
         miopen::deref(numBytes) =
@@ -372,7 +362,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle,
                                                      const int paramID,
                                                      size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, paramID, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, paramID);
     return miopen::try_([&] {
         miopen::deref(numBytes) = miopen::deref(rnnDesc).GetLayerParamSize(
             miopen::deref(handle), layer, miopen::deref(xDesc), paramID);
@@ -385,7 +375,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle,
                                                     const int biasID,
                                                     size_t* numBytes)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, biasID, numBytes);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, biasID);
     return miopen::try_([&] {
         miopen::deref(numBytes) =
             miopen::deref(rnnDesc).GetLayerBiasSize(miopen::deref(handle), layer, biasID);
@@ -402,7 +392,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle,
                                                  miopenTensorDescriptor_t paramDesc,
                                                  void* layerParam)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, paramID, paramDesc, layerParam);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, paramID);
     return miopen::try_([&] {
         miopen::deref(rnnDesc).GetLayerParam(miopen::deref(handle),
                                              layer,
@@ -425,7 +415,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle,
                                                 miopenTensorDescriptor_t biasDesc,
                                                 void* layerBias)
 {
-    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, biasID, biasDesc, layerBias);
+    MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, biasID);
     return miopen::try_([&] {
         miopen::deref(rnnDesc).GetLayerBias(miopen::deref(handle),
                                             layer,
@@ -445,7 +435,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnn
                                                        miopenTensorDescriptor_t paramDesc,
                                                        size_t* layerParamOffset)
 {
-    MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, paramID, paramDesc, layerParamOffset);
+    MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, paramID);
     return miopen::try_([&] {
         miopen::deref(rnnDesc).GetLayerParamOffset(
             layer, miopen::deref(xDesc), paramID, miopen::deref(paramDesc), layerParamOffset);
@@ -459,7 +449,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnD
                                                       miopenTensorDescriptor_t biasDesc,
                                                       size_t* layerBiasOffset)
 {
-    MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, biasID, biasDesc, layerBiasOffset);
+    MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, biasID);
     return miopen::try_([&] {
         miopen::deref(rnnDesc).GetLayerBiasOffset(
             layer, miopen::deref(xDesc), biasID, miopen::deref(biasDesc), layerBiasOffset);
diff --git a/src/sum_api.cpp b/src/sum_api.cpp
index de3744f306..bac24383a3 100644
--- a/src/sum_api.cpp
+++ b/src/sum_api.cpp
@@ -89,7 +89,7 @@ extern "C" miopenStatus_t miopenGetSumWorkspaceSize(miopenHandle_t handle,
                                                     size_t* sizeInBytes)
 {
 
-    MIOPEN_LOG_FUNCTION(handle, xDesc, dim, yDesc, sizeInBytes);
+    MIOPEN_LOG_FUNCTION(handle, xDesc, dim, yDesc);
 
     return miopen::try_([&] {
         miopen::deref(sizeInBytes) = miopen::GetSumWorkspaceSize(
diff --git a/src/tensor_api.cpp b/src/tensor_api.cpp
index 307395f59d..9f742a77aa 100644
--- a/src/tensor_api.cpp
+++ b/src/tensor_api.cpp
@@ -104,7 +104,7 @@ extern "C" miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t t
                                                       int* wStride)
 {
 
-    MIOPEN_LOG_FUNCTION(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+    MIOPEN_LOG_FUNCTION(tensorDesc);
     return miopen::try_([&] {
         miopen::deref(dataType)       = miopen::deref(tensorDesc).GetType();
         miopen::tie_deref(n, c, h, w) = miopen::tien<4>(miopen::deref(tensorDesc).GetLengths());
@@ -234,7 +234,7 @@ extern "C" miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tenso
                                                   size_t* numBytes)
 {
 
-    MIOPEN_LOG_FUNCTION(tensorDesc, numBytes);
+    MIOPEN_LOG_FUNCTION(tensorDesc);
     return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(tensorDesc).GetNumBytes(); });
 }
 
@@ -247,7 +247,7 @@ int miopenGetTensorDescriptorElementSize(miopenTensorDescriptor_t tensorDesc)
 extern "C" miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc,
                                                         int* size)
 {
-    MIOPEN_LOG_FUNCTION(tensorDesc, size);
+    MIOPEN_LOG_FUNCTION(tensorDesc);
     return miopen::try_([&] { miopen::deref(size) = miopen::deref(tensorDesc).GetSize(); });
 }
 
@@ -257,7 +257,7 @@ extern "C" miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t ten
                                                     int* stridesA)
 {
 
-    MIOPEN_LOG_FUNCTION(tensorDesc, dataType, dimsA, stridesA);
+    MIOPEN_LOG_FUNCTION(tensorDesc);
     return miopen::try_([&] {
         if(dataType != nullptr)
         {

From b17d08055137563a0e5b286cf4b587cd6a9a6ec7 Mon Sep 17 00:00:00 2001
From: JD <jahandad@gmail.com>
Date: Fri, 15 Dec 2023 19:14:15 -0600
Subject: [PATCH 2/4] [hipRTC] resolve symbol issues by explicitly link with
 hipRTC (#2612)

* explicitly link with hipRTC

* Update formatting

* Consider MIOPEN_USE_HIPRTC=Off

* Clean up

---------

Co-authored-by: Jun Liu <Liu.Jun@amd.com>
---
 CMakeLists.txt     | 4 +---
 src/CMakeLists.txt | 8 ++++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdf6829c4e..2fd18fce13 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -381,9 +381,7 @@ if(MIOPEN_USE_HIPRTC)
     if(NOT MIOPEN_USE_COMGR)
         message(FATAL_ERROR "HIPRTC can be used only together with COMGR")
     endif()
-    if(WIN32)
-        find_package(hiprtc REQUIRED)
-    endif()
+    find_package(hiprtc REQUIRED)
     message(STATUS "Build with HIPRTC")
 endif()
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4ed162ae92..1d5548db7e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -761,8 +761,12 @@ if( MIOPEN_BACKEND STREQUAL "OpenCL")
 elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
     target_link_libraries( MIOpen PRIVATE hip::device )
     target_link_libraries( MIOpen INTERFACE hip::host )
-    if(MIOPEN_USE_HIPRTC AND WIN32)
-        target_link_libraries( MIOpen PRIVATE hiprtc::hiprtc )
+    if(MIOPEN_USE_HIPRTC)
+        if(WIN32) 
+            target_link_libraries( MIOpen PRIVATE hiprtc::hiprtc )
+        else()
+            target_link_libraries( MIOpen PRIVATE hiprtc)
+        endif()
     endif()
     if(ENABLE_HIP_WORKAROUNDS)
         # Workaround hip not setting its usage requirements correctly

From 62a05348f511ebd8378ef00596aa2d3d7da80179 Mon Sep 17 00:00:00 2001
From: amberhassaan <amber_474@yahoo.com>
Date: Sat, 16 Dec 2023 14:34:46 -0500
Subject: [PATCH 3/4] Standardize workspace abstraction (#2524)

---
 src/ocl/convolutionocl.cpp              |  21 +++
 test/conv_common.hpp                    | 220 +++++++++---------------
 test/ctc.cpp                            |   8 +-
 test/find_2_conv.cpp                    |  23 +--
 test/find_db.cpp                        |  28 ++-
 test/gru_common.hpp                     | 104 +++++------
 test/gtest/conv_api_strided_tensors.cpp |  22 +--
 test/gtest/solver_bwd.hpp               |  14 +-
 test/gtest/solver_fwd.hpp               |  14 +-
 test/gtest/solver_wrw.hpp               |  14 +-
 test/lstm_common.hpp                    | 173 +++++++++++--------
 test/main.cpp                           | 114 +++---------
 test/pooling_common.hpp                 |  21 +--
 test/reduce_test.cpp                    |  77 ++++-----
 test/rnn_vanilla_common.hpp             |  69 ++++----
 test/tensor_reorder.cpp                 |  91 +++-------
 test/workspace.hpp                      | 163 ++++++++++++++++++
 17 files changed, 595 insertions(+), 581 deletions(-)
 create mode 100644 test/workspace.hpp

diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp
index 73e17380ae..94251cc6d0 100644
--- a/src/ocl/convolutionocl.cpp
+++ b/src/ocl/convolutionocl.cpp
@@ -131,6 +131,18 @@ static inline void ValidateGroupCount(const TensorDescriptor& x,
         MIOPEN_THROW(miopenStatusBadParm, "Invalid group number");
 }
 
+static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize)
+{
+
+    [[maybe_unused]] bool x = (workSpace != nullptr);
+    [[maybe_unused]] bool y = (workSpaceSize != 0);
+
+    assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should "
+                                       "be zero or both should be non-zero");
+
+    /// \todo could add a check here that workSpace points to GPU memory
+}
+
 static Invoker PrepareInvoker(ExecutionContext ctx,
                               const conv::ProblemDescription& problem,
                               const NetworkConfig& config,
@@ -260,6 +272,7 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,
                                                  bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || w == nullptr || y == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -495,6 +508,7 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
                                                size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
     ValidateTensors(tensors);
@@ -812,6 +826,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
                                                         const solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
 
     ValidateTensors(tensors);
@@ -846,6 +861,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle,
                                                      bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(dx == nullptr || w == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -944,6 +960,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,
                                                     size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
@@ -1015,6 +1032,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
                                                          solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
     ValidateTensors(tensors);
@@ -1055,6 +1073,7 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle,
                                                         bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || dw == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -1151,6 +1170,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
                                                        size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
     ValidateAlphaBeta(alpha, beta);
@@ -1218,6 +1238,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
                                                     solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
 
diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index efaf4d8f0a..e387e98b5b 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -25,6 +25,7 @@
  *******************************************************************************/
 #pragma once
 #include "test.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iostream>
 #include <iterator>
@@ -330,13 +331,11 @@ struct conv_base
             EXPECT_EQUAL(miopenStatusSuccess,
                          miopenGetSolutionWorkspaceSize(solution, &workspace_size));
 
-            const auto workspace_dev = workspace_size != 0
-                                           ? get_handle().Write(std::vector<char>(workspace_size))
-                                           : nullptr;
+            Workspace wspace{workspace_size};
 
-            EXPECT_EQUAL(miopenStatusSuccess,
-                         miopenRunSolution(
-                             handle, solution, 3, arguments, workspace_dev.get(), workspace_size));
+            EXPECT_EQUAL(
+                miopenStatusSuccess,
+                miopenRunSolution(handle, solution, 3, arguments, wspace.ptr(), wspace.size()));
         }
 
         const auto& solution_deref = miopen::deref(solutions.front());
@@ -596,19 +595,6 @@ struct verify_forward_conv : conv_base<T, Tout>
         return rout;
     }
 
-    void resize_workspace(miopen::Handle& h,
-                          const std::size_t sz,
-                          std::vector<char>& ws,
-                          miopen::Allocator::ManageDataPtr& ws_dev) const
-    {
-        ws_dev.reset();
-        if(sz > 0)
-        {
-            ws.resize(sz);
-            ws_dev = h.Write(ws);
-        }
-    }
-
     tensor<Tout> gpu()
     {
         auto&& handle = get_handle();
@@ -629,8 +615,7 @@ struct verify_forward_conv : conv_base<T, Tout>
         bool fallback_path_taken      = false;
         std::size_t count             = 0;
 
-        std::vector<char> ws;
-        miopen::Allocator::ManageDataPtr ws_dev = nullptr;
+        Workspace wspace{};
 
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = ConvProblemDescription{
@@ -649,8 +634,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvBwdDataAlgorithm(handle,
                                                     input.desc,
@@ -662,8 +646,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    ws_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -696,7 +680,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                                   << " != " << ws_size << std::endl;
                     }
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -707,8 +691,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     wei_dev.get(),
                                                     rout.desc,
                                                     out_dev.get(),
-                                                    ws_dev.get(),
-                                                    selected.workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             else
@@ -717,8 +701,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvFwdAlgorithm(handle,
                                                 input.desc,
@@ -730,8 +713,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                ws_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
 
@@ -765,7 +748,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                                   << " != " << ws_size << std::endl;
                     }
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -776,8 +759,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                    in_dev.get(),
                                                    rout.desc,
                                                    out_dev.get(),
-                                                   ws_dev.get(),
-                                                   selected.workspace_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             break;
@@ -832,9 +815,7 @@ struct verify_forward_conv : conv_base<T, Tout>
 
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -851,15 +832,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                    workspace_dev.reset();
                     if(perf.memory > 0)
                     {
-                        workspace.resize(perf.memory);
-                        workspace_dev = handle.Write(workspace);
+                        wspace.resize(perf.memory);
                     }
 
                     filter.ConvolutionForward(handle,
@@ -872,8 +851,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                               &beta,
                                               rout.desc,
                                               out_dev.get(),
-                                              workspace_dev.get(),
-                                              workspace_size);
+                                              wspace.ptr(),
+                                              wspace.size());
 
                     /// \ref read_solver_name
                     auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -904,9 +883,7 @@ struct verify_forward_conv : conv_base<T, Tout>
             {
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -925,15 +902,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                         1,
                                                         &ret_algo_count,
                                                         &perf,
-                                                        workspace_dev.get(),
-                                                        workspace_size,
+                                                        wspace.ptr(),
+                                                        wspace.size(),
                                                         search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionBackwardData(handle,
@@ -946,8 +921,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                        &beta,
                                                        rout.desc,
                                                        out_dev.get(),
-                                                       workspace_dev.get(),
-                                                       workspace_size);
+                                                       wspace.ptr(),
+                                                       wspace.size());
                     }
                     else
                     {
@@ -961,15 +936,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionForward(handle,
@@ -982,8 +955,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                   &beta,
                                                   rout.desc,
                                                   out_dev.get(),
-                                                  workspace_dev.get(),
-                                                  workspace_size);
+                                                  wspace.ptr(),
+                                                  wspace.size());
                     }
 
                     /// \ref read_solver_name
@@ -1118,6 +1091,8 @@ struct verify_backward_conv : conv_base<T>
         auto wei_dev = handle.Write(weights.data);
         auto in_dev  = handle.Write(rinput.data);
 
+        Workspace wspace{};
+
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
         std::size_t count        = 0;
@@ -1133,9 +1108,7 @@ struct verify_backward_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1154,8 +1127,8 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -1183,18 +1156,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetForwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize(
                     handle, weights.desc, out.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionForwardImmediate(handle,
@@ -1204,8 +1173,8 @@ struct verify_backward_conv : conv_base<T>
                                                    out_dev.get(),
                                                    rinput.desc,
                                                    in_dev.get(),
-                                                   workspace_dev.get(),
-                                                   ws_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             else
@@ -1222,8 +1191,8 @@ struct verify_backward_conv : conv_base<T>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -1250,18 +1219,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetBackwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize(
                     handle, out.desc, weights.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionBackwardImmediate(handle,
@@ -1271,16 +1236,14 @@ struct verify_backward_conv : conv_base<T>
                                                     wei_dev.get(),
                                                     rinput.desc,
                                                     in_dev.get(),
-                                                    workspace_dev.get(),
-                                                    ws_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             break;
         }
         case ConvApi::Find_1_0: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1299,15 +1262,13 @@ struct verify_backward_conv : conv_base<T>
                                             1,
                                             &ret_algo_count,
                                             &perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionForward(handle,
@@ -1320,8 +1281,8 @@ struct verify_backward_conv : conv_base<T>
                                           &beta,
                                           rinput.desc,
                                           in_dev.get(),
-                                          workspace_dev.get(),
-                                          workspace_size);
+                                          wspace.ptr(),
+                                          wspace.size());
             }
             else
             {
@@ -1335,15 +1296,13 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionBackwardData(handle,
@@ -1356,8 +1315,8 @@ struct verify_backward_conv : conv_base<T>
                                                &beta,
                                                rinput.desc,
                                                in_dev.get(),
-                                               workspace_dev.get(),
-                                               workspace_size);
+                                               wspace.ptr(),
+                                               wspace.size());
             }
 
             /// \ref read_solver_name
@@ -1487,6 +1446,7 @@ struct verify_backward_weights_conv : conv_base<T>
         auto out_dev = handle.Write(out.data);
         auto wei_dev = handle.Write(rweights.data);
         auto in_dev  = handle.Write(input.data);
+        Workspace wspace{};
 
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
@@ -1503,9 +1463,7 @@ struct verify_backward_weights_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1523,8 +1481,8 @@ struct verify_backward_weights_conv : conv_base<T>
                     1,
                     &ret_algo_count,
                     &perf,
-                    workspace_dev.get(),
-                    workspace_size,
+                    wspace.ptr(),
+                    wspace.size(),
                     search);
             }
 
@@ -1551,9 +1509,7 @@ struct verify_backward_weights_conv : conv_base<T>
             });
             selected = std::move(solutions.front());
 
-            std::size_t ws_size;
-
-            ws_size = filter.GetWrwSolutionWorkspaceSize(
+            [[maybe_unused]] std::size_t ws_size = filter.GetWrwSolutionWorkspaceSize(
                 handle,
                 filter.mode == miopenTranspose ? input.desc : out.desc,
                 filter.mode == miopenTranspose ? out.desc : input.desc,
@@ -1562,11 +1518,9 @@ struct verify_backward_weights_conv : conv_base<T>
 
             filter.CompileSolution(ctx, problem, selected.solution_id);
 
-            workspace_dev.reset();
             if(selected.workspace_size > 0)
             {
-                workspace.resize(selected.workspace_size);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(selected.workspace_size);
             }
 
             filter.ConvolutionWrwImmediate(
@@ -1577,16 +1531,14 @@ struct verify_backward_weights_conv : conv_base<T>
                 filter.mode == miopenTranspose ? out_dev.get() : in_dev.get(),
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                ws_size,
+                wspace.ptr(),
+                wspace.size(),
                 selected.solution_id);
             break;
         }
         case ConvApi::Find_1_0: {
 
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1603,15 +1555,13 @@ struct verify_backward_weights_conv : conv_base<T>
                 1,
                 &ret_algo_count,
                 &perf,
-                workspace_dev.get(),
-                workspace_size,
+                wspace.ptr(),
+                wspace.size(),
                 search);
 
-            workspace_dev.reset();
             if(perf.memory > 0)
             {
-                workspace.resize(perf.memory);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(perf.memory);
             }
 
             filter.ConvolutionBackwardWeights(
@@ -1625,8 +1575,8 @@ struct verify_backward_weights_conv : conv_base<T>
                 &beta,
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                workspace_size);
+                wspace.ptr(),
+                wspace.size());
 
             /// \ref read_solver_name
             const auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -1778,9 +1728,7 @@ struct verify_forward_conv_int8 : conv_base<T>
                                     wei_vpad_dev.get());
         }
 
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        std::vector<char> workspace(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         int ret_algo_count;
         miopenConvAlgoPerf_t perf;
@@ -1797,8 +1745,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                         1,
                                         &ret_algo_count,
                                         &perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         search);
         }
 
@@ -1826,9 +1774,7 @@ struct verify_forward_conv_int8 : conv_base<T>
         });
         auto selected = std::move(solutions.front());
 
-        std::size_t ws_size;
-
-        ws_size =
+        [[maybe_unused]] std::size_t ws_size =
             filter.GetForwardSolutionWorkspaceSize(handle,
                                                    (is_transform ? weight_vpad_desc : weights.desc),
                                                    (is_transform ? input_vpad_desc : input.desc),
@@ -1837,11 +1783,9 @@ struct verify_forward_conv_int8 : conv_base<T>
 
         filter.CompileSolution(ctx, problem, selected.solution_id);
 
-        workspace_dev.reset();
         if(selected.workspace_size > 0)
         {
-            workspace.resize(selected.workspace_size);
-            workspace_dev = handle.Write(workspace);
+            wspace.resize(selected.workspace_size);
         }
 
         filter.ConvolutionForwardImmediate(handle,
@@ -1851,8 +1795,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                            (is_transform ? in_vpad_dev.get() : in_dev.get()),
                                            rout.desc,
                                            out_dev.get(),
-                                           workspace_dev.get(),
-                                           ws_size,
+                                           wspace.ptr(),
+                                           wspace.size(),
                                            selected.solution_id);
 
         if(count != 0)
diff --git a/test/ctc.cpp b/test/ctc.cpp
index d812cef801..1c759220f2 100644
--- a/test/ctc.cpp
+++ b/test/ctc.cpp
@@ -29,6 +29,7 @@
 #include "tensor_holder.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
 #include <array>
@@ -651,8 +652,7 @@ struct verify_ctcloss
                                                                    inputLengths.data(),
                                                                    miopenCTCLossAlgo_t(0));
 
-        auto workSpace     = tensor<T>{workSpaceSize / sizeof(T)};
-        auto workSpace_dev = handle.Write(workSpace.data);
+        Workspace wspace{workSpaceSize};
 
         auto losses_gpu = losses;
         auto grads_gpu  = grads;
@@ -671,8 +671,8 @@ struct verify_ctcloss
                             grads.desc,
                             grads_dev.get(),
                             miopenCTCLossAlgo_t(0),
-                            workSpace_dev.get(),
-                            workSpaceSize);
+                            wspace.ptr(),
+                            wspace.size());
 
         losses_gpu.data = handle.Read<T>(losses_dev, losses_gpu.data.size());
         grads_gpu.data  = handle.Read<T>(grads_dev, grads_gpu.data.size());
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index a89ce942a7..6e636e265f 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/miopen.h>
 
@@ -210,13 +211,11 @@ struct Find2Test : test_driver
                 }
 
                 const auto workspace_size = std::min(workspace_limit, workspace_max);
-                workspace_dev             = workspace_size != 0
-                                                ? miopen::deref(handle).Write(std::vector<char>(workspace_size))
-                                                : nullptr;
+                Workspace wspace{workspace_size};
 
-                EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace(
-                                 options, workspace_dev.get(), workspace_size),
-                             miopenStatusSuccess);
+                EXPECT_EQUAL(
+                    miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()),
+                    miopenStatusSuccess);
 
                 EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor(
                                  options, miopenTensorConvolutionX, x_dev.get()),
@@ -312,14 +311,11 @@ struct Find2Test : test_driver
     {
         std::cerr << "Running a solution..." << std::endl;
 
-        auto& handle_deref = get_handle();
-
         std::size_t workspace_size;
         EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
                      miopenStatusSuccess);
 
-        auto workspace_dev =
-            workspace_size != 0 ? handle_deref.Write(std::vector<char>(workspace_size)) : nullptr;
+        Workspace wspace{workspace_size};
 
         const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) {
             auto arguments = std::make_unique<miopenTensorArgument_t[]>(num_arguments);
@@ -331,10 +327,9 @@ struct Find2Test : test_driver
                 arguments[i].buffer     = buffers[i];
             }
 
-            EXPECT_EQUAL(
-                miopenRunSolution(
-                    handle, solution, 3, arguments.get(), workspace_dev.get(), workspace_size),
-                miopenStatusSuccess);
+            EXPECT_EQUAL(miopenRunSolution(
+                             handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()),
+                         miopenStatusSuccess);
         };
 
         // Without descriptors
diff --git a/test/find_db.cpp b/test/find_db.cpp
index b07167f80e..47c485182e 100644
--- a/test/find_db.cpp
+++ b/test/find_db.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/convolution.hpp>
 #include <miopen/conv/problem_description.hpp>
@@ -111,10 +112,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -130,8 +128,8 @@ struct FindDbTest : test_driver
                                             1,
                                             &ret_algo_count,
                                             perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             false);
         };
 
@@ -145,10 +143,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -164,8 +159,8 @@ struct FindDbTest : test_driver
                                         1,
                                         &ret_algo_count,
                                         perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         false);
         };
 
@@ -179,10 +174,7 @@ struct FindDbTest : test_driver
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = conv::ProblemDescription{
             y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -198,8 +190,8 @@ struct FindDbTest : test_driver
                                                1,
                                                &ret_algo_count,
                                                perf,
-                                               workspace_dev.get(),
-                                               workspace_size,
+                                               wspace.ptr(),
+                                               wspace.size(),
                                                false);
         };
 
diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 51e3c09012..4df6209e9d 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1963,8 +1964,8 @@ struct verify_forward_infer_gru
 #endif
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz         = 0;
+        size_t workspace_size = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1979,9 +1980,8 @@ struct verify_forward_infer_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -1994,8 +1994,6 @@ struct verify_forward_infer_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2027,8 +2025,8 @@ struct verify_forward_infer_gru
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2249,7 +2247,7 @@ struct verify_forward_train_gru
         auto&& handle = get_handle();
 
         size_t out_sz           = 0;
-        size_t workSpaceSize    = 0;
+        size_t workspace_size   = 0;
         size_t reserveSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
@@ -2265,12 +2263,14 @@ struct verify_forward_train_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1);
+        assert(reserveSpaceSize % sizeof(T) == 0);
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -2284,9 +2284,6 @@ struct verify_forward_train_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2318,10 +2315,10 @@ struct verify_forward_train_gru
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2331,10 +2328,9 @@ struct verify_forward_train_gru
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2484,7 +2480,7 @@ struct verify_backward_data_gru
         int bi        = dirMode != 0 ? 2 : 1;
         int hy_h      = hiddenSize;
         int bi_stride = bi * hy_h;
-        size_t workSpaceSize;
+        size_t workspace_size;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2494,8 +2490,8 @@ struct verify_backward_data_gru
         // Outputs ----------
         size_t in_sz = 0;
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, inputDescs.data(), &in_sz);
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        std::vector<T> workSpace(workspace_size / sizeof(T));
         std::vector<T> dx(in_sz / sizeof(T));
         std::vector<T> dhx(initHidden.size());
 
@@ -2562,8 +2558,7 @@ struct verify_backward_data_gru
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2578,15 +2573,17 @@ struct verify_backward_data_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
-        auto yin_dev          = handle.Write(yin);
-        auto dyin_dev         = handle.Write(dy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-        auto weights_dev      = handle.Write(weights);
+        auto yin_dev     = handle.Write(yin);
+        auto dyin_dev    = handle.Write(dy);
+        auto weights_dev = handle.Write(weights);
+
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
 
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -2633,15 +2630,15 @@ struct verify_backward_data_gru
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2840,8 +2837,11 @@ struct verify_backward_weights_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wspace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2869,10 +2869,10 @@ struct verify_backward_weights_gru
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -3123,10 +3123,10 @@ struct gru_basic_driver : test_driver
         size_t reserveSpaceSize;
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + 2 * out_sz +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + 2 * out_sz +
                            (in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz)) *
                                sizeof(T);
diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp
index 9a2876b3f0..d4e49201f4 100644
--- a/test/gtest/conv_api_strided_tensors.cpp
+++ b/test/gtest/conv_api_strided_tensors.cpp
@@ -29,6 +29,7 @@
 #include <gtest/gtest.h>
 #include <miopen/miopen.h>
 #include "platform.hpp"
+#include "../workspace.hpp"
 
 #define MIOPEN_CHECK_RET(val) ASSERT_EQ(val, miopenStatusSuccess)
 
@@ -66,10 +67,12 @@ class ConvStridedTensors : public ::testing::Test
         MIOPEN_CHECK_RET(miopenSetConvolutionGroupCount(conv_descr, 1));
 
         // Workspace
+        size_t sz = 0;
         MIOPEN_CHECK_RET(miopenConvolutionForwardGetWorkSpaceSize(
-            handle, filter_descr, input_descr, conv_descr, output_descr, &workspace_size));
+            handle, filter_descr, input_descr, conv_descr, output_descr, &sz));
 
         // Data
+        wspace.resize(sz);
         h_input.resize(input_size);
         h_filter.resize(filter_size);
         h_output.resize(output_size);
@@ -125,7 +128,7 @@ class ConvStridedTensors : public ::testing::Test
     std::vector<int> dilation                = {1, 1, 1};
 
     // Workspace
-    size_t workspace_size;
+    Workspace wspace{};
 
     // Data
     const size_t input_size   = input_dims[0] * input_strides[0];
@@ -143,10 +146,9 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
 {
     auto device = Device(handle);
 
-    auto d_workspace = device.Malloc(workspace_size);
-    auto d_input     = device.Malloc(input_bytes);
-    auto d_filter    = device.Malloc(filter_bytes);
-    auto d_output    = device.Malloc(output_bytes);
+    auto d_input  = device.Malloc(input_bytes);
+    auto d_filter = device.Malloc(filter_bytes);
+    auto d_output = device.Malloc(output_bytes);
 
     std::fill_n(h_input.begin(), h_input.size(), 1.f);
     ASSERT_TRUE(d_input.CopyToDevice(h_input.data(), input_bytes));
@@ -168,8 +170,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                                     sizeof(perf_results) / sizeof(perf_results[0]),
                                                     &perf_results_count,
                                                     perf_results,
-                                                    d_workspace.Data(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     true),
               miopenStatusSuccess);
     ASSERT_GT(perf_results_count, 0);
@@ -189,8 +191,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                        &beta,
                                        output_descr,
                                        d_output.Data(),
-                                       d_workspace.Data(),
-                                       workspace_size),
+                                       wspace.ptr(),
+                                       wspace.size()),
               miopenStatusSuccess);
     ASSERT_TRUE(device.Synchronize());
 }
diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp
index cb55a5951b..6511800ce6 100644
--- a/test/gtest/solver_bwd.hpp
+++ b/test/gtest/solver_bwd.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvBwdSolverTest
@@ -76,15 +77,11 @@ struct ConvBwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
-                                           conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
@@ -177,8 +174,7 @@ struct ConvBwdSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp
index 88fa9a9c55..20b16fcc32 100644
--- a/test/gtest/solver_fwd.hpp
+++ b/test/gtest/solver_fwd.hpp
@@ -38,6 +38,7 @@
 
 #include "conv_test_base.hpp"
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvFwdSolverTest
@@ -77,15 +78,11 @@ struct ConvFwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
-                                           this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
 
         // auto sol = solv.GetSolution(ctx, problem);
         // This is complicated due to the split between tunable and non-tunable solvers
@@ -116,8 +113,7 @@ struct ConvFwdSolverTest
     }
 
     ConvTestCase conv_config;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
     miopenTensorLayout_t tensor_layout;
diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp
index dde92e2071..dcf8311d83 100644
--- a/test/gtest/solver_wrw.hpp
+++ b/test/gtest/solver_wrw.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvWrwSolverTest
@@ -76,15 +77,11 @@ struct ConvWrwSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::WrWInvokeParams{tensors,
-                                          workspace_dev.get(),
-                                          workspace_size,
-                                          conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::WrWInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
@@ -179,8 +176,7 @@ struct ConvWrwSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index a09aab4209..d2b7d1a077 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -27,6 +27,7 @@
 #ifndef GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 #define GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 
+#include "workspace.hpp"
 #include "driver.hpp"
 #include "dropout_util.hpp"
 #include "get_handle.hpp"
@@ -96,6 +97,8 @@ struct verify_backward_data_lstm
     std::vector<T> initHidden; // HX
     std::vector<T> initCell;   // CX
     std::vector<T> weights;
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
     std::vector<int> batch_seq;
     int hiddenSize;
     int seqLength;
@@ -115,8 +118,6 @@ struct verify_backward_data_lstm
     bool nodcx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_backward_data_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& py,
@@ -153,6 +154,8 @@ struct verify_backward_data_lstm
           initHidden(phx),
           initCell(pcx),
           weights(pW),
+          RSVgpu(pRSVgpu),
+          RSVcpu(pRSVcpu),
           batch_seq(pBS),
           hiddenSize(pHS),
           seqLength(pS),
@@ -171,9 +174,7 @@ struct verify_backward_data_lstm
           nodhx(pnodhx),
           nodcx(pnodcx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          RSVgpu(pRSVgpu.begin()),
-          RSVcpu(pRSVcpu.begin())
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -248,6 +249,8 @@ struct verify_backward_weights_lstm
     std::vector<T> input;      // Y
     std::vector<T> dy;         // dY
     std::vector<T> initHidden; // HX
+    std::vector<T> reserveSpace_gpu;
+    std::vector<T> reserveSpace_cpu;
     std::vector<T> workSpace;
     std::vector<int> batch_seq;
     int weightSize;
@@ -264,8 +267,6 @@ struct verify_backward_weights_lstm
     bool nohx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T> reserveSpace_gpu;
-    typename std::vector<T> reserveSpace_cpu;
 
     verify_backward_weights_lstm(miopenRNNDescriptor_t pRD,
                                  const std::vector<T>& px,
@@ -291,6 +292,8 @@ struct verify_backward_weights_lstm
         : input(px),
           dy(pdy),
           initHidden(phx),
+          reserveSpace_gpu(pRSVgpu),
+          reserveSpace_cpu(pRSVcpu),
           workSpace(pWS),
           batch_seq(pBS),
           weightSize(pW),
@@ -306,9 +309,7 @@ struct verify_backward_weights_lstm
           realHiddenSize(pHXZ),
           nohx(pnohx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          reserveSpace_gpu(pRSVgpu),
-          reserveSpace_cpu(pRSVcpu)
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -533,8 +534,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -549,9 +549,10 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -565,8 +566,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         auto cy = initCell;
         std::fill(cy.begin(), cy.end(), 0.);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -577,6 +576,9 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         wlen[0] = weights.size();
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen);
 
+        /// \todo: fix the handle.Write() calls below because they generate
+        /// temporary objects that may get destroyed before the
+        /// miopenRNNForwardInference call happens
         miopenRNNForwardInference(&handle,
                                   rnnDesc,
                                   seqLength,
@@ -594,8 +596,8 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                                   ((nohy) ? nullptr : handle.Write(hy).get()),
                                   &hiddenDesc,
                                   ((nocy) ? nullptr : handle.Write(cy).get()),
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -673,9 +675,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
     using verify_forward_lstm<T>::nocy;
     using verify_forward_lstm<T>::use_seqPadding;
 
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
+
     bool use_dropout;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_forward_train_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& px,
@@ -700,7 +703,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                               const bool pnocy           = false,
                               const bool puse_dropout    = false,
                               const bool puse_seqPadding = false)
-        : RSVgpu(pRSVgpu.begin()), RSVcpu(pRSVcpu.begin())
+        : RSVgpu(pRSVgpu), RSVcpu(pRSVcpu)
     {
         input          = px;
         initHidden     = phx;
@@ -861,7 +864,11 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true);
         }
 
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+        if(reserveSpace.size() != RSVcpu.size())
+        {
+            std::abort();
+        }
+        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
         auto retSet = std::make_tuple(
             output, (nohy ? initHidden : hiddenState), (nocy ? initCell : cellState));
@@ -908,17 +915,15 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         std::fill(output.begin(), output.end(), static_cast<T>(0));
         auto output_dev = handle.Write(output);
 
-        size_t workSpaceSize    = 0;
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
+
         size_t reserveSpaceSize = 0;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
+        Workspace rspace{reserveSpaceSize};
 
         auto weights_dev = handle.Write(weights);
 
@@ -957,10 +962,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  ((nocy) ? nullptr : cy_dev.get()),
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -969,9 +974,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             printf("GPU outdata[%d]: %f\n", i, outdata[i]);
         }
 #endif
-        reserveSpace =
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+        rspace.ReadTo(RSVgpu);
 
         std::vector<T> output_gpu = handle.Read<T>(output_dev, output.size());
 
@@ -1049,7 +1052,7 @@ verify_backward_data_lstm<T>::cpu() const
     int hy_h      = hiddenSize;
     int bi_stride = bi * hy_h;
     int out_h     = hiddenSize * ((dirMode != 0) ? 2 : 1);
-    size_t workSpaceSize;
+    size_t workspace_size;
 
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1066,8 +1069,8 @@ verify_backward_data_lstm<T>::cpu() const
                                       true,
                                       use_seqPadding);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    std::vector<T> workSpace(workspace_size / sizeof(T));
     std::vector<T> dx(in_sz);
     std::vector<T> dhx(initHidden.size());
     std::vector<T> dcx(initHidden.size());
@@ -1086,8 +1089,11 @@ verify_backward_data_lstm<T>::cpu() const
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T);
     }
 
-    std::vector<T> reserveSpace(reserveSpaceSize);
-    std::copy(RSVcpu, RSVcpu + reserveSpaceSize, reserveSpace.begin());
+    if(reserveSpaceSize != RSVcpu.size())
+    {
+        std::abort();
+    }
+    std::vector<T> reserveSpace(RSVcpu);
 
     std::vector<T> converted_dinput;
     std::vector<T> converted_output;
@@ -1119,7 +1125,7 @@ verify_backward_data_lstm<T>::cpu() const
         packed_doutput = &converted_doutput;
 
         // WA
-        wa_workSpace.resize(workSpaceSize / sizeof(T) - (packedXInSize + packedYOutSize));
+        wa_workSpace.resize(workspace_size / sizeof(T) - (packedXInSize + packedYOutSize));
         wa_shifted_workSpace = &wa_workSpace;
     }
     else
@@ -1186,7 +1192,7 @@ verify_backward_data_lstm<T>::cpu() const
                   workSpace.begin() + converted_doutput.size() + converted_dinput.size());
     }
 
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
     // TODO: remove workSpace
     auto retSet =
@@ -1210,8 +1216,6 @@ verify_backward_data_lstm<T>::gpu() const
 
     auto&& handle = get_handle();
 
-    size_t workSpaceSize = 0;
-
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
     createTensorDescArray(
@@ -1225,20 +1229,37 @@ verify_backward_data_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
-    auto workSpace_dev = handle.Write(workSpace);
-
-    size_t reserveSpaceSize;
+    size_t workspace_size = 0;
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    if(workspace_size % sizeof(T) != 0)
+    {
+        std::abort();
+    }
+    Workspace wspace{};
+    // Needed to zero out the workspace (happens in std::vector's constructor)
+    // or else this test fails verification when workspace is compared against the
+    // workspace returned by ::cpu method in this class
+    wspace.Write(std::vector<T>(workspace_size / sizeof(T)));
+    // wspace.resize(workspace_size);
+
+    size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-    std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-    std::copy(RSVgpu, RSVgpu + reserveSpace.size(), reserveSpace.begin());
+    /// \todo: fix miopenGetRNNTrainingReserveSize to return a multiple of
+    /// sizeof(T)
+    // Needed because reserveSpaceSize returned is not a multiple of sizeof(T).
+    reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-    auto yin_dev          = handle.Write(yin);
-    auto dyin_dev         = handle.Write(dy);
-    auto reserveSpace_dev = handle.Write(reserveSpace);
-    auto weights_dev      = handle.Write(weights);
+    if(reserveSpaceSize != (RSVgpu.size() * sizeof(T)))
+    {
+        std::abort();
+    }
+    Workspace rspace{};
+    rspace.Write(RSVgpu);
+
+    auto yin_dev     = handle.Write(yin);
+    auto dyin_dev    = handle.Write(dy);
+    auto weights_dev = handle.Write(weights);
 
     std::vector<int> hlens(3, 0);
     hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -1284,18 +1305,18 @@ verify_backward_data_lstm<T>::gpu() const
                           ((nodhx) ? nullptr : dhx_dev.get()),
                           &hiddenDesc,
                           ((nodcx) ? nullptr : dcx_dev.get()),
-                          workSpace_dev.get(),
-                          workSpaceSize,
-                          reserveSpace_dev.get(),
-                          reserveSpace.size() * sizeof(T));
+                          wspace.ptr(),
+                          wspace.size(),
+                          rspace.ptr(),
+                          rspace.size());
 
-    reserveSpace = handle.Read<T>(reserveSpace_dev, reserveSpace.size());
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+    assert(RSVgpu.size() * sizeof(T) == rspace.size());
+    rspace.ReadTo(RSVgpu);
     // TODO: remove workSpace
     auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                   (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
                                   (nodcx ? initCell : handle.Read<T>(dcx_dev, dcx.size())),
-                                  handle.Read<T>(workSpace_dev, workSpace.size()));
+                                  wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1427,8 +1448,12 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    auto workSpace_dev    = handle.Write(workSpace);
-    auto reserveSpace_dev = handle.Write(reserveSpace_gpu);
+    Workspace wspace{};
+    wspace.Write(workSpace);
+
+    Workspace rspace{};
+    rspace.Write(reserveSpace_gpu);
+
     std::vector<T> dweights(weightSize);
     auto dweights_dev = handle.Write(dweights);
     miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -1452,10 +1477,10 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                              dy_dev.get(),
                              &weightDesc,
                              dweights_dev.get(),
-                             workSpace_dev.get(),
-                             workSpace.size() * sizeof(T),
-                             reserveSpace_dev.get(),
-                             reserveSpace_gpu.size() * sizeof(T));
+                             wspace.ptr(),
+                             wspace.size(),
+                             rspace.ptr(),
+                             rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1732,10 +1757,10 @@ struct lstm_basic_driver : test_driver
                                            false,
                                            usePadding);
 
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size +
                            (2 * out_sz + in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz) + (nocx ? 0 : hx_sz) +
                             (nocy ? 0 : hx_sz) + (nodcx ? 0 : hx_sz) + (nodcy ? 0 : hx_sz)) *
@@ -1815,7 +1840,7 @@ struct lstm_basic_driver : test_driver
 #if(MIO_LSTM_TEST_DEBUG > 0)
         printf("Running backward weights LSTM.\n");
         printf("reserve sz: %d, workSpace sz: %d, weight sz: %d\n",
-               reserveSpaceBwdData.size(),
+               rsvcpu.size(),
                workSpaceBwdData.size(),
                wei_sz);
         fflush(nullptr);
diff --git a/test/main.cpp b/test/main.cpp
index b48705754d..a1efb2eff8 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -23,8 +23,10 @@
  * SOFTWARE.
  *
  *******************************************************************************/
+#include "get_handle.hpp"
 #include "test.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iterator>
 #include <memory>
@@ -37,17 +39,8 @@
 struct handle_fixture
 {
     miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
 
-    handle_fixture()
-    {
-        miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
-    }
+    handle_fixture() { miopenCreate(&handle); }
 
     ~handle_fixture() { miopenDestroy(handle); }
 };
@@ -167,8 +160,6 @@ struct conv_forward : output_tensor_fixture
     {
         float alpha = 1, beta = 0;
 
-        // Setup OpenCL buffers
-
         int n, h, c, w;
         STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w));
         size_t sz_in = static_cast<size_t>(n) * c * h * w;
@@ -182,15 +173,12 @@ struct conv_forward : output_tensor_fixture
         size_t sz_fwd_workspace;
         STATUS(miopenConvolutionForwardGetWorkSpaceSize(
             handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace));
-        // OCL fails to allocate zero workspace. Let's allocate small workspace instead to simplify
-        // subsequent code.
-        if(sz_fwd_workspace == 0)
-            sz_fwd_workspace = 256;
+
+        Workspace wspace{sz_fwd_workspace};
 
         std::vector<float> in(sz_in);
         std::vector<float> wei(sz_wei);
         std::vector<float> out(sz_out);
-        std::vector<float> fwd_workspace(sz_fwd_workspace / 4);
 
         for(size_t i = 0; i < sz_in; i++)
         {
@@ -201,60 +189,16 @@ struct conv_forward : output_tensor_fixture
             wei[i] = prng::gen_A_to_B(-0.5f, 0.5f) * 0.001f;
         }
 
-#if MIOPEN_BACKEND_OPENCL
-
-        cl_context ctx;
-        clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, nullptr);
-
-        cl_int status  = CL_SUCCESS;
-        cl_mem in_dev  = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_in, nullptr, &status);
-        cl_mem wei_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_wei, nullptr, nullptr);
-        cl_mem out_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 4 * sz_out, nullptr, nullptr);
-        cl_mem fwd_workspace_dev =
-            clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz_fwd_workspace, nullptr, nullptr);
-
-        status =
-            clEnqueueWriteBuffer(q, in_dev, CL_TRUE, 0, 4 * sz_in, in.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, wei_dev, CL_TRUE, 0, 4 * sz_wei, wei.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, out_dev, CL_TRUE, 0, 4 * sz_out, out.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(q,
-                                       fwd_workspace_dev,
-                                       CL_TRUE,
-                                       0,
-                                       sz_fwd_workspace,
-                                       fwd_workspace.data(),
-                                       0,
-                                       nullptr,
-                                       nullptr);
-        EXPECT(status == CL_SUCCESS);
-
-#elif MIOPEN_BACKEND_HIP
-
-        void* in_dev;
-        void* wei_dev;
-        void* out_dev;
-        void* fwd_workspace_dev;
-
-        EXPECT(hipMalloc(&in_dev, 4 * sz_in) == hipSuccess);
-        EXPECT(hipMalloc(&wei_dev, 4 * sz_wei) == hipSuccess);
-        EXPECT(hipMalloc(&out_dev, 4 * sz_out) == hipSuccess);
-        EXPECT(hipMalloc(&fwd_workspace_dev, sz_fwd_workspace) == hipSuccess);
-
-        EXPECT(hipMemcpy(in_dev, in.data(), 4 * sz_in, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(wei_dev, wei.data(), 4 * sz_wei, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(out_dev, out.data(), 4 * sz_out, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(fwd_workspace_dev,
-                         fwd_workspace.data(),
-                         sz_fwd_workspace,
-                         hipMemcpyHostToDevice) == hipSuccess);
+        auto& mhand = get_handle();
+
+        auto in_dev  = mhand.Write(in);
+        auto wei_dev = mhand.Write(wei);
+        auto out_dev = mhand.Write(out);
 
-#endif
         int value = 10;
-        STATUS(miopenSetTensor(handle, inputTensor, in_dev, &value));
+        STATUS(miopenSetTensor(handle, inputTensor, in_dev.get(), &value));
 
-        STATUS(miopenScaleTensor(handle, inputTensor, in_dev, &alpha));
+        STATUS(miopenScaleTensor(handle, inputTensor, in_dev.get(), &alpha));
 
         float time;
 
@@ -276,32 +220,32 @@ struct conv_forward : output_tensor_fixture
             STATUS(miopenFindConvolutionForwardAlgorithm(
                 used_handle,
                 inputTensor,
-                in_dev,
+                in_dev.get(),
                 convFilter,
-                wei_dev,
+                wei_dev.get(),
                 convDesc,
                 outputTensor,
-                out_dev,
+                out_dev.get(),
                 1,
                 &ret_algo_count,
                 &perf,
-                fwd_workspace_dev,
-                sz_fwd_workspace,
+                wspace.ptr(),
+                wspace.size(),
                 0)); // MD: Not performing exhaustiveSearch by default for now
 
             STATUS(miopenConvolutionForward(used_handle,
                                             &alpha,
                                             inputTensor,
-                                            in_dev,
+                                            in_dev.get(),
                                             convFilter,
-                                            wei_dev,
+                                            wei_dev.get(),
                                             convDesc,
                                             perf.fwd_algo,
                                             &beta,
                                             outputTensor,
-                                            out_dev,
-                                            fwd_workspace_dev,
-                                            sz_fwd_workspace));
+                                            out_dev.get(),
+                                            wspace.ptr(),
+                                            wspace.size()));
 
             STATUS(miopenGetKernelTime(used_handle, &time));
 
@@ -318,20 +262,6 @@ struct conv_forward : output_tensor_fixture
         {
             CHECK(time == 0.0);
         }
-
-// Potential memory leak free memory at end of function
-#if MIOPEN_BACKEND_OPENCL
-        clReleaseMemObject(in_dev);
-        clReleaseMemObject(wei_dev);
-        clReleaseMemObject(out_dev);
-        clReleaseMemObject(fwd_workspace_dev);
-
-#elif MIOPEN_BACKEND_HIP
-        hipFree(in_dev);
-        hipFree(wei_dev);
-        hipFree(out_dev);
-        hipFree(fwd_workspace_dev);
-#endif
     }
 };
 
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index ae55b2e133..33bcb7164f 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -46,6 +46,7 @@
 #include "tensor_holder.hpp"
 #include "verify.hpp"
 #include "cpu_conv.hpp"
+#include "workspace.hpp"
 
 #define TEST_PADDING_MODE 0
 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
@@ -199,9 +200,10 @@ struct verify_forward_pooling
         auto out      = get_output_tensor(filter, input);
         indices.resize(out.data.size(), 0);
 
-        auto in_dev        = handle.Write(input.data);
-        auto out_dev       = handle.Create<T>(out.data.size());
-        auto workspace_dev = handle.Write(indices);
+        auto in_dev  = handle.Write(input.data);
+        auto out_dev = handle.Create<T>(out.data.size());
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Forward(handle,
@@ -212,10 +214,10 @@ struct verify_forward_pooling
                        out.desc,
                        out_dev.get(),
                        true,
-                       workspace_dev.get(),
-                       indices.size() * sizeof(Index));
+                       wspace.ptr(),
+                       wspace.size());
 
-        indices  = handle.Read<Index>(workspace_dev, indices.size());
+        indices  = wspace.Read<std::vector<Index>>();
         out.data = handle.Read<T>(out_dev, out.data.size());
         return out;
     }
@@ -403,9 +405,8 @@ struct verify_backward_pooling
         auto out_dev  = handle.Write(out.data);
         auto din_dev  = handle.Create<T>(dinput.data.size());
 
-        // std::vector<char> workspace(filter.GetWorkSpaceSize(out.desc));
-        // auto workspace_dev = handle.Write(workspace);
-        auto workspace_dev = handle.Write(indices);
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Backward(handle,
@@ -423,7 +424,7 @@ struct verify_backward_pooling
                         // dx
                         dinput.desc,
                         din_dev.get(),
-                        workspace_dev.get());
+                        wspace.ptr());
 
         dinput.data = handle.Read<T>(din_dev, dinput.data.size());
         return dinput;
diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp
index 3ca771fc64..dc92a20318 100644
--- a/test/reduce_test.cpp
+++ b/test/reduce_test.cpp
@@ -27,6 +27,7 @@
 #include "driver.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "get_handle.hpp"
 #include "tensor_holder.hpp"
 #include "random.hpp"
@@ -60,28 +61,26 @@ struct verify_reduce_with_indices
     miopenReduceTensorIndices_t indicesOpt;
     miopenIndicesType_t indicesType;
 
-    verify_reduce_with_indices( // NOLINT (hicpp-member-init)
-        const miopen::ReduceTensorDescriptor& reduce_,
-        const tensor<T>& input_,
-        const tensor<T>& output_,
-        const tensor<T>& workspace_,
-        const tensor<int>& indices_,
-        float alpha_,
-        float beta_)
+    verify_reduce_with_indices(const miopen::ReduceTensorDescriptor& reduce_,
+                               const tensor<T>& input_,
+                               const tensor<T>& output_,
+                               const tensor<T>& workspace_,
+                               const tensor<int>& indices_,
+                               float alpha_,
+                               float beta_)
+        : reduce(reduce_),
+          input(input_),
+          output(output_),
+          workspace(workspace_),
+          indices(indices_),
+          alpha(alpha_),
+          beta(beta_),
+          reduceOp(reduce.reduceTensorOp_),
+          compTypeVal(reduce.reduceTensorCompType_),
+          nanOpt(reduce.reduceTensorNanOpt_),
+          indicesOpt(reduce.reduceTensorIndices_),
+          indicesType(reduce.reduceTensorIndicesType_)
     {
-        reduce    = reduce_;
-        input     = input_;
-        output    = output_;
-        workspace = workspace_;
-        indices   = indices_;
-        alpha     = alpha_;
-        beta      = beta_;
-
-        reduceOp    = reduce.reduceTensorOp_;
-        compTypeVal = reduce.reduceTensorCompType_;
-        nanOpt      = reduce.reduceTensorNanOpt_;
-        indicesOpt  = reduce.reduceTensorIndices_;
-        indicesType = reduce.reduceTensorIndicesType_;
     }
 
     tensor<float> cpu() const
@@ -343,10 +342,11 @@ struct verify_reduce_with_indices
         auto res         = output;
         auto res_indices = indices;
 
-        auto indices_dev = handle.Write(indices.data);
+        Workspace idxspace{};
+        idxspace.Write(indices.data);
 
-        std::size_t ws_sizeInBytes      = workspace.desc.GetElementSize() * sizeof(T);
-        std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -358,15 +358,13 @@ struct verify_reduce_with_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
@@ -377,8 +375,8 @@ struct verify_reduce_with_indices
         else
         {
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
                                 nullptr,
                                 0,
                                 alphaPtr,
@@ -390,7 +388,7 @@ struct verify_reduce_with_indices
         };
 
         res.data         = handle.Read<T>(output_dev, res.data.size());
-        res_indices.data = handle.Read<int>(indices_dev, res_indices.data.size());
+        res_indices.data = idxspace.Read<decltype(res_indices.data)>();
 
         return (std::make_tuple(res, res_indices));
     }
@@ -647,7 +645,8 @@ struct verify_reduce_no_indices
         // replicate
         auto res = output;
 
-        std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -659,15 +658,13 @@ struct verify_reduce_no_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
                                 nullptr,
                                 0,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index ef5c1088c3..854f682068 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1436,8 +1437,7 @@ struct verify_forward_infer_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{workSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -1450,8 +1450,6 @@ struct verify_forward_infer_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1479,8 +1477,8 @@ struct verify_forward_infer_rnn
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1708,11 +1706,13 @@ struct verify_forward_train_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        Workspace wspace{workSpaceSize};
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -1726,9 +1726,6 @@ struct verify_forward_train_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1756,10 +1753,10 @@ struct verify_forward_train_rnn
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1769,10 +1766,9 @@ struct verify_forward_train_rnn
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
 
@@ -2002,15 +1998,15 @@ struct verify_backward_data_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        Workspace wspace{workSpaceSize};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev  = handle.Write(yin);
         auto dyin_dev = handle.Write(dy);
         // auto dhyin_dev        = handle.Write(dhy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-        auto weights_dev      = handle.Write(weights);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+        auto weights_dev = handle.Write(weights);
         // auto hx_dev           = handle.Write(initHidden);
 
         std::vector<int> hlens(3, 0);
@@ -2054,15 +2050,15 @@ struct verify_backward_data_rnn
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2257,8 +2253,11 @@ struct verify_backward_weights_rnn
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wspace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2283,10 +2282,10 @@ struct verify_backward_weights_rnn
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp
index dc1a38f508..bf40e7ee38 100644
--- a/test/tensor_reorder.cpp
+++ b/test/tensor_reorder.cpp
@@ -38,6 +38,8 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "random.hpp"
+#include "get_handle.hpp"
+#include "workspace.hpp"
 
 template <>
 struct miopen_type<uint8_t> : std::integral_constant<miopenDataType_t, miopenInt8>
@@ -286,19 +288,6 @@ bool verify_tensor(tensor<T>& t_gpu, tensor<T>& t_cpu)
 
 struct tensor_reorder_base_driver : test_driver
 {
-    miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
-
-    tensor_reorder_base_driver()
-    {
-        miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
-    }
-    ~tensor_reorder_base_driver() { miopenDestroy(handle); }
 
     static std::vector<uint32_t> get_dim_3_size() { return {1, 9}; }
     static std::vector<uint32_t> get_dim_2_size() { return {1, 9}; }
@@ -363,14 +352,14 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
     // NOLINTBEGIN(clang-analyzer-cplusplus.NewDeleteLeaks)
     void run()
     {
-        auto run_reorder = [this](uint32_t dim_0,
-                                  uint32_t dim_1,
-                                  uint32_t dim_2,
-                                  uint32_t dim_3,
-                                  uint32_t order_0,
-                                  uint32_t order_1,
-                                  uint32_t order_2,
-                                  uint32_t order_3) {
+        auto run_reorder = [](uint32_t dim_0,
+                              uint32_t dim_1,
+                              uint32_t dim_2,
+                              uint32_t dim_3,
+                              uint32_t order_0,
+                              uint32_t order_1,
+                              uint32_t order_2,
+                              uint32_t order_3) {
             int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3;
             std::vector<int> tensor_len({static_cast<int>(dim_0),
                                          static_cast<int>(dim_1),
@@ -392,8 +381,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
             tensor<T> t_dst_gpu(tensor_len, tensor_strides);
             rand_tensor_integer(t_src);
 
+            auto& handle = get_handle();
             miopen::ExecutionContext ctx;
-            ctx.SetStream(&miopen::deref(this->handle));
+            ctx.SetStream(&handle);
             // ctx.SetupFloats();
             auto reorder_sol = MakeTensorReorderAttributes(ctx,
                                                            to_miopen_data_type<T>::get(),
@@ -406,36 +396,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                                            order_2,
                                                            order_3);
             EXPECT(reorder_sol != nullptr);
-            size_t workspace = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
-                                                          : reorder_sol->GetOutputTensorSize();
-#if MIOPEN_BACKEND_OPENCL
-            cl_context cl_ctx;
-            clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &cl_ctx, nullptr);
-            cl_int status = CL_SUCCESS;
-            cl_mem src_dev =
-                clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, sizeof(T) * tensor_sz, nullptr, &status);
-            cl_mem dst_dev = clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, workspace, nullptr, nullptr);
-            status |= clEnqueueWriteBuffer(q,
-                                           src_dev,
-                                           CL_TRUE,
-                                           0,
-                                           sizeof(T) * tensor_sz,
-                                           t_src.data.data(),
-                                           0,
-                                           nullptr,
-                                           nullptr);
-            EXPECT(status == CL_SUCCESS);
-#elif MIOPEN_BACKEND_HIP
-            void* src_dev;
-            void* dst_dev;
-            EXPECT(hipMalloc(&src_dev, sizeof(T) * tensor_sz) == hipSuccess);
-            EXPECT(hipMalloc(&dst_dev, workspace) == hipSuccess);
-            EXPECT(hipMemcpy(
-                       src_dev, t_src.data.data(), sizeof(T) * tensor_sz, hipMemcpyHostToDevice) ==
-                   hipSuccess);
-#endif
-            const auto invoke_param = reorder_invoke_param{
-                DataCast(static_cast<const void*>(src_dev)), DataCast(dst_dev)};
+            size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
+                                                               : reorder_sol->GetOutputTensorSize();
+            Workspace wspace{workspace_size};
+
+            auto src_dev = handle.Write(t_src.data);
+
+            const auto invoke_param         = reorder_invoke_param{src_dev.get(), wspace.ptr()};
             std::vector<OpKernelArg> opArgs = reorder_sol->GetKernelArg();
             boost::optional<miopen::InvokerFactory> invoker_factory(
                 [=](const std::vector<miopen::Kernel>& kernels) mutable {
@@ -451,10 +418,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                 });
             std::vector<miopen::solver::KernelInfo> construction_params{
                 reorder_sol->GetKernelInfo()};
-            const auto invoker =
-                miopen::deref(this->handle).PrepareInvoker(*invoker_factory, construction_params);
+            const auto invoker = handle.PrepareInvoker(*invoker_factory, construction_params);
             // run gpu
-            invoker(miopen::deref(this->handle), invoke_param);
+            invoker(handle, invoke_param);
             // run cpu
             cpu_reorder<T>::run(t_dst.data.data(),
                                 t_src.data.data(),
@@ -467,18 +433,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                 order_2,
                                 order_3);
             invoker_factory = boost::none;
-#if MIOPEN_BACKEND_OPENCL
-            status = clEnqueueReadBuffer(
-                q, dst_dev, CL_TRUE, 0, workspace, t_dst_gpu.data.data(), 0, nullptr, nullptr);
-            EXPECT(status == CL_SUCCESS);
-            clReleaseMemObject(dst_dev);
-            clReleaseMemObject(src_dev);
-#elif MIOPEN_BACKEND_HIP
-            EXPECT(hipMemcpy(t_dst_gpu.data.data(), dst_dev, workspace, hipMemcpyDeviceToHost) ==
-                   hipSuccess);
-            hipFree(dst_dev);
-            hipFree(src_dev);
-#endif
+
+            t_dst_gpu.data = wspace.Read<decltype(t_dst_gpu.data)>();
+
             // we expect excact match, since use integer
             bool valid_result = verify_tensor(t_dst_gpu, t_dst);
             std::cout << "[" << reorder_str::get(order_0, order_1, order_2, order_3) << ", b"
diff --git a/test/workspace.hpp b/test/workspace.hpp
new file mode 100644
index 0000000000..93522b1cd4
--- /dev/null
+++ b/test/workspace.hpp
@@ -0,0 +1,163 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <hip/hip_runtime.h>
+
+#include "get_handle.hpp"
+
+#define HIP_CHECK(exp)                                                                 \
+    if((exp) != hipSuccess)                                                            \
+    {                                                                                  \
+        MIOPEN_LOG_E(#exp "failed at line: " << __LINE__ << " in file: " << __FILE__); \
+    }
+
+class Workspace
+{
+
+    // RAII class for hip allocations
+    class GPUBuffer
+    {
+    public:
+        GPUBuffer() = default;
+
+        explicit GPUBuffer(size_t num_bytes) : sz_(num_bytes)
+        {
+            if(num_bytes > 0)
+            {
+                HIP_CHECK(hipMalloc(&buf_, num_bytes));
+                assert(buf_ != nullptr);
+            }
+            else
+            {
+                buf_ = nullptr;
+            }
+        }
+
+        ~GPUBuffer() { FreeBuf(); }
+
+        void* ptr() { return buf_; }
+        void* ptr() const { return buf_; }
+
+        auto size() const { return sz_; }
+
+        GPUBuffer(const GPUBuffer&) = delete;
+        GPUBuffer& operator=(const GPUBuffer&) = delete;
+
+        GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_), sz_(that.sz_)
+        {
+            that.buf_ = nullptr; // take over ownership
+            that.sz_  = 0;
+        }
+
+        GPUBuffer& operator=(GPUBuffer&& that) noexcept
+        {
+            FreeBuf();
+            std::swap(this->buf_, that.buf_);
+            std::swap(this->sz_, that.sz_);
+            return *this;
+        }
+
+    private:
+        void FreeBuf()
+        {
+            HIP_CHECK(hipFree(buf_));
+            buf_ = nullptr;
+            sz_  = 0;
+        }
+
+        void* buf_ = nullptr;
+        size_t sz_ = 0;
+    };
+
+    // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
+    // passed to capture the size. Must call AdjustToSize() after calling such a method
+    size_t* SizePtr() { return &sz_; }
+
+    void AdjustToSize()
+    {
+        if(sz_ != gpu_buf_.size())
+        {
+            gpu_buf_ = GPUBuffer(sz_);
+        }
+    }
+
+public:
+    explicit Workspace(size_t sz = 0) : sz_(sz) { AdjustToSize(); }
+
+    Workspace(const Workspace&) = delete;
+    Workspace& operator=(const Workspace&) = delete;
+    Workspace(Workspace&&)                 = default;
+    Workspace& operator=(Workspace&&) = default;
+
+    size_t size() const { return sz_; }
+
+    void resize(size_t sz_in_bytes)
+    {
+        sz_ = sz_in_bytes;
+        AdjustToSize();
+    }
+
+    auto ptr() const { return gpu_buf_.ptr(); }
+
+    auto ptr() { return gpu_buf_.ptr(); }
+
+    template <typename V>
+    void Write(const V& vec)
+    {
+        using T = typename V::value_type;
+        resize(vec.size() * sizeof(T));
+        HIP_CHECK(hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice));
+    }
+
+    template <typename V>
+    void ReadTo(V& vec) const
+    {
+        using T = typename V::value_type;
+        if(vec.size() * sizeof(T) != size())
+        {
+            MIOPEN_LOG_E("vector of wrong size passed");
+            std::abort();
+        }
+        HIP_CHECK(hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost));
+    }
+
+    template <typename V>
+    V Read() const
+    {
+        using T         = typename V::value_type;
+        size_t num_elem = size() / sizeof(T);
+        V ret(num_elem);
+        ReadTo(ret);
+        return ret;
+    }
+
+private:
+    // miopen::Handle& handle_;
+    // miopen::Allocator::ManageDataPtr data_{};
+    GPUBuffer gpu_buf_{};
+    size_t sz_{};
+};

From c5a2384dc0f29682ed51aeccf9b981dbdf7e058f Mon Sep 17 00:00:00 2001
From: Chris Erb <chris.erb65@gmail.com>
Date: Sat, 16 Dec 2023 14:25:15 -0600
Subject: [PATCH 4/4] [gtest] conversion for code coverage tests (#2580)

---
 test/CMakeLists.txt                   |  22 ---
 test/{immed_conv3d.cpp => conv3d.hpp} |  12 +-
 test/driver.hpp                       |   7 +-
 test/gtest/CMakeLists.txt             |   2 +-
 test/gtest/conv3d_codecov.cpp         | 172 +++++++++++++++++++++
 test/gtest/conv_2d.hpp                |  66 ---------
 test/gtest/conv_igemm_mlir_xdlops.cpp |   2 +-
 test/gtest/immed_conv2d_codecov.cpp   | 206 ++++++++++++++++++++++++++
 test/gtest/immed_conv3d_codecov.cpp   | 206 ++++++++++++++++++++++++++
 test/gtest/pooling2d_asymmetric.cpp   | 136 +++++++++++++++++
 test/gtest/pooling2d_codecov.cpp      | 136 +++++++++++++++++
 test/gtest/pooling2d_wide.cpp         | 136 +++++++++++++++++
 test/immed_conv2d.cpp                 |  62 --------
 test/{pooling2d.cpp => pooling2d.hpp} |   2 -
 14 files changed, 1005 insertions(+), 162 deletions(-)
 rename test/{immed_conv3d.cpp => conv3d.hpp} (91%)
 create mode 100644 test/gtest/conv3d_codecov.cpp
 delete mode 100644 test/gtest/conv_2d.hpp
 create mode 100644 test/gtest/immed_conv2d_codecov.cpp
 create mode 100644 test/gtest/immed_conv3d_codecov.cpp
 create mode 100644 test/gtest/pooling2d_asymmetric.cpp
 create mode 100644 test/gtest/pooling2d_codecov.cpp
 create mode 100644 test/gtest/pooling2d_wide.cpp
 delete mode 100644 test/immed_conv2d.cpp
 rename test/{pooling2d.cpp => pooling2d.hpp} (98%)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d35ed0c43f..ffaeb93228 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -673,20 +673,6 @@ function(add_custom_test NAME)
         set_tests_properties(${NAME} PROPERTIES DISABLED On)
     endif()
 endfunction()
-if(${CODECOV_TEST})
-    add_custom_test(test_conv3d_codecov
-        COMMAND $<TARGET_FILE:test_conv3d> ${MIOPEN_TEST_FLOAT_ARG} --input 2 4 4 4 4 --weights 2 4 1 1 1 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS}
-    )
-    add_custom_test(test_immed_conv2d_codecov
-        COMMAND $<TARGET_FILE:test_immed_conv2d> ${MIOPEN_TEST_FLOAT_ARG} --input  2 2 14 14 --weights 8 2 3 3 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS}
-    )
-    add_custom_test(test_immed_conv3d_codecov
-        COMMAND $<TARGET_FILE:test_immed_conv3d> ${MIOPEN_TEST_FLOAT_ARG} --input 1 4 4 4 4 --weights 2 4 3 3 3 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS}
-    )
-    add_custom_test(test_pooling2d_codecov
-        COMMAND $<TARGET_FILE:test_pooling2d> ${MIOPEN_TEST_FLOAT_ARG} --input 1, 192, 28, 28 --lens 2 2 --strides 2 2 --pads 0 0 ${MIOPEN_TEST_FLAGS_ARGS}
-    )
-endif()
 
 if(${MIOPEN_TEST_WITH_MIOPENDRIVER})
     add_custom_test(test_miopendriver_regression_issue_1576 SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED FLOAT_DISABLED HALF_ENABLED
@@ -776,14 +762,6 @@ set(TEST_CONV_VERBOSE_F ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-backward-da
 set(TEST_CONV_VERBOSE_B ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-forward --disable-backward-weights)
 set(TEST_CONV_VERBOSE_W ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-forward --disable-backward-data)
 
-add_custom_test(test_pooling2d_asymmetric SKIP_UNLESS_ALL HALF_ENABLED GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED
-    COMMAND $<TARGET_FILE:test_pooling2d> ${MIOPEN_TEST_FLOAT_ARG} --all --dataset 1 --limit 0 ${MIOPEN_TEST_FLAGS_ARGS}
-)
-
-add_custom_test(test_pooling2d_wide SKIP_UNLESS_ALL HALF_ENABLED GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED
-    COMMAND $<TARGET_FILE:test_pooling2d> ${MIOPEN_TEST_FLOAT_ARG} --all --dataset 2 --limit 0 ${MIOPEN_TEST_FLAGS_ARGS}
-)
-
 set(IMPLICITGEMM_MLIR_ENV_F_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmFwdXdlops)
 set(IMPLICITGEMM_MLIR_ENV_B_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmBwdXdlops)
 set(IMPLICITGEMM_MLIR_ENV_W_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmWrWXdlops)
diff --git a/test/immed_conv3d.cpp b/test/conv3d.hpp
similarity index 91%
rename from test/immed_conv3d.cpp
rename to test/conv3d.hpp
index a968cd6100..2b49c0f10e 100644
--- a/test/immed_conv3d.cpp
+++ b/test/conv3d.hpp
@@ -26,9 +26,9 @@
 #include "conv_common.hpp"
 
 template <class T>
-struct conv3d_driver : conv_driver<T, ConvApi::Immediate>
+struct conv3d_driver : conv_driver<T>
 {
-    conv3d_driver() : conv_driver<T, ConvApi::Immediate>()
+    conv3d_driver() : conv_driver<T>()
     {
         this->add(this->input_dims, "input");
         this->add(this->weight_tensor_dims, "weights");
@@ -37,16 +37,16 @@ struct conv3d_driver : conv_driver<T, ConvApi::Immediate>
                   this->generate_data_limited(this->get_batch_sizes(), 1, {8}));
         this->add(this->input_channels,
                   "input_channels",
-                  this->generate_data_limited(this->get_input_channels(), 1, {2}));
+                  this->generate_data_limited(this->get_input_channels(), 1, {32}));
         this->add(this->output_channels,
                   "output_channels",
-                  this->generate_data_limited(this->get_output_channels(), 1, {16}));
+                  this->generate_data_limited(this->get_output_channels(), 1, {32}));
         this->add(this->spatial_dim_elements,
                   "spatial_dim_elements",
                   this->generate_data_limited(this->get_3d_spatial_dims(), 1, {16, 16, 16}));
         this->add(this->filter_dims,
                   "filter_dims",
-                  this->generate_data_limited(this->get_3d_filter_dims(), 2, {5, 5, 5}));
+                  this->generate_data_limited(this->get_3d_filter_dims(), 2, {3, 3, 3}));
         this->add(this->pads_strides_dilations,
                   "pads_strides_dilations",
                   this->generate_data_limited(this->get_3d_pads_strides_dilations(), 2));
@@ -58,5 +58,3 @@ struct conv3d_driver : conv_driver<T, ConvApi::Immediate>
         this->add(this->out_layout, "out_layout", this->generate_data({"NCDHW"}));
     }
 };
-
-int main(int argc, const char* argv[]) { test_drive<conv3d_driver>(argc, argv); }
diff --git a/test/driver.hpp b/test/driver.hpp
index 825a0d4511..97896d6c5d 100644
--- a/test/driver.hpp
+++ b/test/driver.hpp
@@ -1242,6 +1242,11 @@ void test_drive_impl_1(std::string program_name, std::vector<std::string> as)
     Driver d{};
     d.program_name = program_name;
 
+    std::cout << program_name << " ";
+    for(const auto& str : as)
+        std::cout << str << " ";
+    std::cout << std::endl;
+
     std::set<std::string> keywords{
         "--help", "-h", "--half", "--float", "--double", "--int8", "--bfloat16"};
     d.parse(keyword_set{keywords});
@@ -1381,7 +1386,7 @@ template <template <class...> class Driver>
 void test_drive(int argc, const char* argv[])
 {
     std::vector<std::string> as(argv + 1, argv + argc);
-    as.emplace_back("--float");
+    // as.emplace_back("--float");
     for(auto&& arg : as)
     {
         if(arg == "--half")
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 2e80fb5eac..1e3fb206f3 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -30,7 +30,7 @@ function(add_gtest TEST_NAME)
     endif()
     if(NOT WIN32) # TODO: cannot run on Windows due to missing DLL dependencies
       # Enable CMake to discover the test binary
-      gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}")
+      gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL};CODECOV_TEST=${CODECOV_TEST}")
     endif()
 
     if(WIN32)
diff --git a/test/gtest/conv3d_codecov.cpp b/test/gtest/conv3d_codecov.cpp
new file mode 100644
index 0000000000..f85b072722
--- /dev/null
+++ b/test/gtest/conv3d_codecov.cpp
@@ -0,0 +1,172 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "conv3d.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(CODECOV_TEST)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+static bool SkipTest(void) { return !miopen::IsEnabled(ENV(CODECOV_TEST)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Conv3dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dBFloat16 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dInt8 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run3dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenHalf: params = Conv3dHalf::GetParam(); break;
+    case miopenBFloat16: params = Conv3dBFloat16::GetParam(); break;
+    case miopenFloat: params = Conv3dFloat::GetParam(); break;
+    case miopenInt8: params = Conv3dInt8::GetParam(); break;
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL() << "miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+                  "data type not supported by "
+                  "conv3d_codecov test";
+
+    default: params = Conv3dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<conv3d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Conv3dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dBFloat16, BFloat16Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenBFloat16);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dInt8, Int8Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenInt8);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string& precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_conv3d " + precision + " --input 2 4 4 4 4 --weights 2 4 1 1 1 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(Conv3D, Conv3dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(Conv3D, Conv3dHalf, testing::Values(GetTestCases("--half")));
+
+INSTANTIATE_TEST_SUITE_P(Conv3D, Conv3dBFloat16, testing::Values(GetTestCases("--bfloat16")));
+
+INSTANTIATE_TEST_SUITE_P(Conv3D, Conv3dInt8, testing::Values(GetTestCases("--int8")));
diff --git a/test/gtest/conv_2d.hpp b/test/gtest/conv_2d.hpp
deleted file mode 100644
index df73453ef2..0000000000
--- a/test/gtest/conv_2d.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2023 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
-#include "conv_common.hpp"
-
-template <class T>
-struct conv2d_driver : conv_driver<T>
-{
-    conv2d_driver() : conv_driver<T>()
-    {
-        this->add(this->input_dims, "input");
-        this->add(this->weight_tensor_dims, "weights");
-        this->add(this->batch_size,
-                  "batch_size",
-                  this->generate_data_limited(this->get_batch_sizes(), 1));
-        this->add(this->input_channels,
-                  "input_channels",
-                  this->generate_data_limited(this->get_input_channels(), 1, {32}));
-        this->add(this->output_channels,
-                  "output_channels",
-                  this->generate_data_limited(this->get_output_channels(), 1, {64}));
-        this->add(this->spatial_dim_elements,
-                  "spatial_dim_elements",
-                  this->generate_data_limited(this->get_2d_spatial_dims(), 1, {28, 28}));
-        this->add(this->filter_dims,
-                  "filter_dims",
-                  this->generate_data_limited(this->get_2d_filter_dims(), 2, {3, 3}));
-        this->add(this->pads_strides_dilations,
-                  "pads_strides_dilations",
-                  this->generate_data_limited(this->get_2d_pads_strides_dilations(), 2));
-        this->add(this->trans_output_pads,
-                  "trans_output_pads",
-                  this->generate_data(this->get_2d_trans_output_pads()));
-        this->add(this->in_layout, "in_layout", this->generate_data({"NCHW"}));
-        this->add(this->fil_layout, "fil_layout", this->generate_data({"NCHW"}));
-        this->add(this->out_layout, "out_layout", this->generate_data({"NCHW"}));
-        this->add(this->deterministic, "deterministic", this->generate_data({false}));
-        this->add(this->tensor_vect, "tensor_vect", this->generate_data({0}));
-        this->add(this->vector_length, "vector_length", this->generate_data({1}));
-        // Only valid for int8 input and weights
-        this->add(this->output_type, "output_type", this->generate_data({"int32"}));
-        this->add(this->int8_vectorize, "int8_vectorize", this->generate_data({false}));
-    }
-};
diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp
index bcc9495204..3d339d9c07 100644
--- a/test/gtest/conv_igemm_mlir_xdlops.cpp
+++ b/test/gtest/conv_igemm_mlir_xdlops.cpp
@@ -4,7 +4,7 @@
 #include <gtest/gtest.h>
 #include <miopen/miopen.h>
 #include <miopen/env.hpp>
-#include "conv_2d.hpp"
+#include "conv2d.hpp"
 #include "get_handle.hpp"
 
 MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG)
diff --git a/test/gtest/immed_conv2d_codecov.cpp b/test/gtest/immed_conv2d_codecov.cpp
new file mode 100644
index 0000000000..ba2a9de09c
--- /dev/null
+++ b/test/gtest/immed_conv2d_codecov.cpp
@@ -0,0 +1,206 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "conv_common.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(CODECOV_TEST)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+template <class T>
+struct conv2d_driver : conv_driver<T, ConvApi::Immediate>
+{
+    conv2d_driver() : conv_driver<T, ConvApi::Immediate>()
+    {
+        this->add(this->input_dims, "input");
+        this->add(this->weight_tensor_dims, "weights");
+        this->add(this->batch_size,
+                  "batch_size",
+                  this->generate_data_limited(this->get_batch_sizes(), 1, {16}));
+        this->add(this->input_channels,
+                  "input_channels",
+                  this->generate_data_limited(this->get_input_channels(), 1, {32}));
+        this->add(this->output_channels,
+                  "output_channels",
+                  this->generate_data_limited(this->get_output_channels(), 1, {32}));
+        this->add(this->spatial_dim_elements,
+                  "spatial_dim_elements",
+                  this->generate_data_limited(this->get_2d_spatial_dims(), 1, {56, 56}));
+        this->add(this->filter_dims,
+                  "filter_dims",
+                  this->generate_data_limited(this->get_2d_filter_dims(), 2, {3, 3}));
+        this->add(this->pads_strides_dilations,
+                  "pads_strides_dilations",
+                  this->generate_data_limited(this->get_2d_pads_strides_dilations(), 2));
+        this->add(this->trans_output_pads,
+                  "trans_output_pads",
+                  this->generate_data_limited(this->get_2d_trans_output_pads(), 1));
+        this->add(this->in_layout, "in_layout", this->generate_data({"NCHW"}));
+        this->add(this->fil_layout, "fil_layout", this->generate_data({"NCHW"}));
+        this->add(this->out_layout, "out_layout", this->generate_data({"NCHW"}));
+    }
+};
+
+static bool SkipTest(void) { return !miopen::IsEnabled(ENV(CODECOV_TEST)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Conv2dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv2dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv2dBFloat16 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv2dInt8 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run2dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenHalf: params = Conv2dHalf::GetParam(); break;
+    case miopenBFloat16: params = Conv2dBFloat16::GetParam(); break;
+    case miopenFloat: params = Conv2dFloat::GetParam(); break;
+    case miopenInt8: params = Conv2dInt8::GetParam(); break;
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL() << "miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+                  "data type not supported by "
+                  "immed_conv2d_codecov test";
+
+    default: params = Conv2dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<conv2d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Conv2dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv2dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv2dBFloat16, BFloat16Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenBFloat16);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv2dInt8, Int8Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenInt8);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string& precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_immed_conv2d " + precision + " --input  2 2 14 14 --weights 8 2 3 3 --pads_strides_dilations 0 0 1 1 1 1 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv2D, Conv2dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv2D, Conv2dHalf, testing::Values(GetTestCases("--half")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv2D, Conv2dBFloat16, testing::Values(GetTestCases("--bfloat16")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv2D, Conv2dInt8, testing::Values(GetTestCases("--int8")));
diff --git a/test/gtest/immed_conv3d_codecov.cpp b/test/gtest/immed_conv3d_codecov.cpp
new file mode 100644
index 0000000000..3f22479d91
--- /dev/null
+++ b/test/gtest/immed_conv3d_codecov.cpp
@@ -0,0 +1,206 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "conv_common.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(CODECOV_TEST)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+template <class T>
+struct conv3d_driver : conv_driver<T, ConvApi::Immediate>
+{
+    conv3d_driver() : conv_driver<T, ConvApi::Immediate>()
+    {
+        this->add(this->input_dims, "input");
+        this->add(this->weight_tensor_dims, "weights");
+        this->add(this->batch_size,
+                  "batch_size",
+                  this->generate_data_limited(this->get_batch_sizes(), 1, {8}));
+        this->add(this->input_channels,
+                  "input_channels",
+                  this->generate_data_limited(this->get_input_channels(), 1, {2}));
+        this->add(this->output_channels,
+                  "output_channels",
+                  this->generate_data_limited(this->get_output_channels(), 1, {16}));
+        this->add(this->spatial_dim_elements,
+                  "spatial_dim_elements",
+                  this->generate_data_limited(this->get_3d_spatial_dims(), 1, {16, 16, 16}));
+        this->add(this->filter_dims,
+                  "filter_dims",
+                  this->generate_data_limited(this->get_3d_filter_dims(), 2, {5, 5, 5}));
+        this->add(this->pads_strides_dilations,
+                  "pads_strides_dilations",
+                  this->generate_data_limited(this->get_3d_pads_strides_dilations(), 2));
+        this->add(this->trans_output_pads,
+                  "trans_output_pads",
+                  this->generate_data_limited(this->get_3d_trans_output_pads(), 1));
+        this->add(this->in_layout, "in_layout", this->generate_data({"NCDHW"}));
+        this->add(this->fil_layout, "fil_layout", this->generate_data({"NCDHW"}));
+        this->add(this->out_layout, "out_layout", this->generate_data({"NCDHW"}));
+    }
+};
+
+static bool SkipTest(void) { return !miopen::IsEnabled(ENV(CODECOV_TEST)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Conv3dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dBFloat16 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Conv3dInt8 : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run3dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenHalf: params = Conv3dHalf::GetParam(); break;
+    case miopenBFloat16: params = Conv3dBFloat16::GetParam(); break;
+    case miopenFloat: params = Conv3dFloat::GetParam(); break;
+    case miopenInt8: params = Conv3dInt8::GetParam(); break;
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL() << "miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+                  "data type not supported by "
+                  "immed_conv3d_codecov test";
+
+    default: params = Conv3dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<conv3d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Conv3dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dBFloat16, BFloat16Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenBFloat16);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Conv3dInt8, Int8Test)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run3dDriver(miopenInt8);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string& precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_immed_conv3d " + precision + " --input 1 4 4 4 4 --weights 2 4 3 3 3 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv3D, Conv3dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv3D, Conv3dHalf, testing::Values(GetTestCases("--half")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv3D, Conv3dBFloat16, testing::Values(GetTestCases("--bfloat16")));
+
+INSTANTIATE_TEST_SUITE_P(ImmedConv3D, Conv3dInt8, testing::Values(GetTestCases("--int8")));
diff --git a/test/gtest/pooling2d_asymmetric.cpp b/test/gtest/pooling2d_asymmetric.cpp
new file mode 100644
index 0000000000..894210b813
--- /dev/null
+++ b/test/gtest/pooling2d_asymmetric.cpp
@@ -0,0 +1,136 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "pooling2d.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_ALL)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Pooling2dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Pooling2dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run2dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenFloat: params = Pooling2dFloat::GetParam(); break;
+    case miopenHalf: params = Pooling2dHalf::GetParam(); break;
+    case miopenBFloat16:
+    case miopenInt8:
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL()
+            << "miopenBFloat16, miopenInt8, miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+               "data type not supported by "
+               "immed_conv2d_codecov test";
+
+    default: params = Pooling2dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<pooling2d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Pooling2dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Pooling2dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string& precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_pooling2d " + precision + " --all --dataset 1 --limit 0 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dHalf, testing::Values(GetTestCases("--half")));
diff --git a/test/gtest/pooling2d_codecov.cpp b/test/gtest/pooling2d_codecov.cpp
new file mode 100644
index 0000000000..52daa50f3f
--- /dev/null
+++ b/test/gtest/pooling2d_codecov.cpp
@@ -0,0 +1,136 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "pooling2d.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(CODECOV_TEST)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+static bool SkipTest(void) { return !miopen::IsEnabled(ENV(CODECOV_TEST)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Pooling2dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Pooling2dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run2dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenHalf: params = Pooling2dHalf::GetParam(); break;
+    case miopenFloat: params = Pooling2dFloat::GetParam(); break;
+    case miopenBFloat16:
+    case miopenInt8:
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL()
+            << "miopenBFloat16, miopenInt8, miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+               "data type not supported by "
+               "immed_conv2d_codecov test";
+
+    default: params = Pooling2dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<pooling2d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Pooling2dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Pooling2dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string& precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_pooling2d " + precision + "--input 1, 192, 28, 28 --lens 2 2 --strides 2 2 --pads 0 0 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dHalf, testing::Values(GetTestCases("--half")));
diff --git a/test/gtest/pooling2d_wide.cpp b/test/gtest/pooling2d_wide.cpp
new file mode 100644
index 0000000000..159da6f608
--- /dev/null
+++ b/test/gtest/pooling2d_wide.cpp
@@ -0,0 +1,136 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "pooling2d.hpp"
+#include "get_handle.hpp"
+#include <miopen/env.hpp>
+#include <gtest/gtest.h>
+
+MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLAGS_ARGS)
+
+static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_ALL)); }
+
+void GetArgs(const std::string& param, std::vector<std::string>& tokens)
+{
+    std::stringstream ss(param);
+    std::istream_iterator<std::string> begin(ss);
+    std::istream_iterator<std::string> end;
+    while(begin != end)
+        tokens.push_back(*begin++);
+}
+
+class Pooling2dFloat : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+class Pooling2dHalf : public testing::TestWithParam<std::vector<std::string>>
+{
+};
+
+void Run2dDriver(miopenDataType_t prec)
+{
+
+    std::vector<std::string> params;
+    switch(prec)
+    {
+    case miopenFloat: params = Pooling2dFloat::GetParam(); break;
+    case miopenHalf: params = Pooling2dHalf::GetParam(); break;
+    case miopenBFloat16:
+    case miopenInt8:
+    case miopenFloat8:
+    case miopenBFloat8:
+    case miopenInt32:
+    case miopenDouble:
+        FAIL()
+            << "miopenBFloat16, miopenInt8, miopenInt32, miopenDouble, miopenFloat8, miopenBFloat8 "
+               "data type not supported by "
+               "immed_conv2d_codecov test";
+
+    default: params = Pooling2dFloat::GetParam();
+    }
+
+    for(const auto& test_value : params)
+    {
+        std::vector<std::string> tokens;
+        GetArgs(test_value, tokens);
+        std::vector<const char*> ptrs;
+
+        std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) {
+            return str.data();
+        });
+
+        testing::internal::CaptureStderr();
+        test_drive<pooling2d_driver>(ptrs.size(), ptrs.data());
+        auto capture = testing::internal::GetCapturedStderr();
+        std::cout << capture;
+    }
+};
+
+bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; }
+
+TEST_P(Pooling2dFloat, FloatTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenFloat);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(Pooling2dHalf, HalfTest)
+{
+    const auto& handle = get_handle();
+    if(IsTestSupportedForDevice(handle) && !SkipTest())
+    {
+        Run2dDriver(miopenHalf);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+std::vector<std::string> GetTestCases(const std::string precision)
+{
+    const auto& flag_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLAGS_ARGS));
+
+    const std::vector<std::string> test_cases = {
+        // clang-format off
+    {"test_pooling2d " + precision + " --all --dataset 2 --limit 0 "+flag_arg}
+        // clang-format on
+    };
+
+    return test_cases;
+}
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dFloat, testing::Values(GetTestCases("--float")));
+
+INSTANTIATE_TEST_SUITE_P(Pooling2D, Pooling2dHalf, testing::Values(GetTestCases("--half")));
diff --git a/test/immed_conv2d.cpp b/test/immed_conv2d.cpp
deleted file mode 100644
index 6649c69d65..0000000000
--- a/test/immed_conv2d.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
-#include "conv_common.hpp"
-
-template <class T>
-struct conv2d_driver : conv_driver<T, ConvApi::Immediate>
-{
-    conv2d_driver() : conv_driver<T, ConvApi::Immediate>()
-    {
-        this->add(this->input_dims, "input");
-        this->add(this->weight_tensor_dims, "weights");
-        this->add(this->batch_size,
-                  "batch_size",
-                  this->generate_data_limited(this->get_batch_sizes(), 1, {16}));
-        this->add(this->input_channels,
-                  "input_channels",
-                  this->generate_data_limited(this->get_input_channels(), 1, {32}));
-        this->add(this->output_channels,
-                  "output_channels",
-                  this->generate_data_limited(this->get_output_channels(), 1, {32}));
-        this->add(this->spatial_dim_elements,
-                  "spatial_dim_elements",
-                  this->generate_data_limited(this->get_2d_spatial_dims(), 1, {56, 56}));
-        this->add(this->filter_dims,
-                  "filter_dims",
-                  this->generate_data_limited(this->get_2d_filter_dims(), 2, {3, 3}));
-        this->add(this->pads_strides_dilations,
-                  "pads_strides_dilations",
-                  this->generate_data_limited(this->get_2d_pads_strides_dilations(), 2));
-        this->add(this->trans_output_pads,
-                  "trans_output_pads",
-                  this->generate_data_limited(this->get_2d_trans_output_pads(), 1));
-        this->add(this->in_layout, "in_layout", this->generate_data({"NCHW"}));
-        this->add(this->fil_layout, "fil_layout", this->generate_data({"NCHW"}));
-        this->add(this->out_layout, "out_layout", this->generate_data({"NCHW"}));
-    }
-};
-
-int main(int argc, const char* argv[]) { test_drive<conv2d_driver>(argc, argv); }
diff --git a/test/pooling2d.cpp b/test/pooling2d.hpp
similarity index 98%
rename from test/pooling2d.cpp
rename to test/pooling2d.hpp
index c0df88c481..128e81cce2 100644
--- a/test/pooling2d.cpp
+++ b/test/pooling2d.hpp
@@ -105,5 +105,3 @@ struct pooling2d_driver : pooling_driver<T>
         this->add(this->wsidx, "wsidx", this->generate_data({0, 1}));
     }
 };
-
-int main(int argc, const char* argv[]) { test_drive<pooling2d_driver>(argc, argv); }